diff options
Diffstat (limited to 'compiler/rustc_parse/src')
21 files changed, 1032 insertions, 538 deletions
diff --git a/compiler/rustc_parse/src/errors.rs b/compiler/rustc_parse/src/errors.rs index abaff7d9c19..20bcefd4fe1 100644 --- a/compiler/rustc_parse/src/errors.rs +++ b/compiler/rustc_parse/src/errors.rs @@ -1,3 +1,5 @@ +// ignore-tidy-filelength + use std::borrow::Cow; use rustc_ast::token::Token; @@ -260,7 +262,7 @@ pub(crate) struct NotAsNegationOperator { } #[derive(Subdiagnostic)] -pub enum NotAsNegationOperatorSub { +pub(crate) enum NotAsNegationOperatorSub { #[suggestion( parse_unexpected_token_after_not_default, style = "verbose", @@ -424,7 +426,7 @@ pub(crate) enum IfExpressionMissingThenBlockSub { #[derive(Diagnostic)] #[diag(parse_ternary_operator)] #[help] -pub struct TernaryOperator { +pub(crate) struct TernaryOperator { #[primary_span] pub span: Span, } @@ -1088,7 +1090,7 @@ pub(crate) enum ExpectedIdentifierFound { } impl ExpectedIdentifierFound { - pub fn new(token_descr: Option<TokenDescription>, span: Span) -> Self { + pub(crate) fn new(token_descr: Option<TokenDescription>, span: Span) -> Self { (match token_descr { Some(TokenDescription::ReservedIdentifier) => { ExpectedIdentifierFound::ReservedIdentifier @@ -1114,25 +1116,19 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for ExpectedIdentifier { fn into_diag(self, dcx: DiagCtxtHandle<'a>, level: Level) -> Diag<'a, G> { let token_descr = TokenDescription::from_token(&self.token); - let mut diag = Diag::new( - dcx, - level, - match token_descr { - Some(TokenDescription::ReservedIdentifier) => { - fluent::parse_expected_identifier_found_reserved_identifier_str - } - Some(TokenDescription::Keyword) => { - fluent::parse_expected_identifier_found_keyword_str - } - Some(TokenDescription::ReservedKeyword) => { - fluent::parse_expected_identifier_found_reserved_keyword_str - } - Some(TokenDescription::DocComment) => { - fluent::parse_expected_identifier_found_doc_comment_str - } - None => fluent::parse_expected_identifier_found_str, - }, - ); + let mut diag = Diag::new(dcx, level, match token_descr { + Some(TokenDescription::ReservedIdentifier) => { + fluent::parse_expected_identifier_found_reserved_identifier_str + } + Some(TokenDescription::Keyword) => fluent::parse_expected_identifier_found_keyword_str, + Some(TokenDescription::ReservedKeyword) => { + fluent::parse_expected_identifier_found_reserved_keyword_str + } + Some(TokenDescription::DocComment) => { + fluent::parse_expected_identifier_found_doc_comment_str + } + None => fluent::parse_expected_identifier_found_str, + }); diag.span(self.span); diag.arg("token", self.token); @@ -1174,23 +1170,17 @@ impl<'a, G: EmissionGuarantee> Diagnostic<'a, G> for ExpectedSemi { fn into_diag(self, dcx: DiagCtxtHandle<'a>, level: Level) -> Diag<'a, G> { let token_descr = TokenDescription::from_token(&self.token); - let mut diag = Diag::new( - dcx, - level, - match token_descr { - Some(TokenDescription::ReservedIdentifier) => { - fluent::parse_expected_semi_found_reserved_identifier_str - } - Some(TokenDescription::Keyword) => fluent::parse_expected_semi_found_keyword_str, - Some(TokenDescription::ReservedKeyword) => { - fluent::parse_expected_semi_found_reserved_keyword_str - } - Some(TokenDescription::DocComment) => { - fluent::parse_expected_semi_found_doc_comment_str - } - None => fluent::parse_expected_semi_found_str, - }, - ); + let mut diag = Diag::new(dcx, level, match token_descr { + Some(TokenDescription::ReservedIdentifier) => { + fluent::parse_expected_semi_found_reserved_identifier_str + } + Some(TokenDescription::Keyword) => fluent::parse_expected_semi_found_keyword_str, + Some(TokenDescription::ReservedKeyword) => { + fluent::parse_expected_semi_found_reserved_keyword_str + } + Some(TokenDescription::DocComment) => fluent::parse_expected_semi_found_doc_comment_str, + None => fluent::parse_expected_semi_found_str, + }); diag.span(self.span); diag.arg("token", self.token); @@ -1569,7 +1559,7 @@ pub(crate) struct ExpectedFnPathFoundFnKeyword { } #[derive(Diagnostic)] -#[diag(parse_path_single_colon)] +#[diag(parse_path_double_colon)] pub(crate) struct PathSingleColon { #[primary_span] pub span: Span, @@ -1582,6 +1572,14 @@ pub(crate) struct PathSingleColon { } #[derive(Diagnostic)] +#[diag(parse_path_double_colon)] +pub(crate) struct PathTripleColon { + #[primary_span] + #[suggestion(applicability = "maybe-incorrect", code = "", style = "verbose")] + pub span: Span, +} + +#[derive(Diagnostic)] #[diag(parse_colon_as_semi)] pub(crate) struct ColonAsSemi { #[primary_span] @@ -1659,7 +1657,7 @@ pub(crate) struct SelfArgumentPointer { #[derive(Diagnostic)] #[diag(parse_unexpected_token_after_dot)] -pub struct UnexpectedTokenAfterDot<'a> { +pub(crate) struct UnexpectedTokenAfterDot<'a> { #[primary_span] pub span: Span, pub actual: Cow<'a, str>, @@ -1928,7 +1926,7 @@ pub(crate) enum UnexpectedTokenAfterStructName { } impl UnexpectedTokenAfterStructName { - pub fn new(span: Span, token: Token) -> Self { + pub(crate) fn new(span: Span, token: Token) -> Self { match TokenDescription::from_token(&token) { Some(TokenDescription::ReservedIdentifier) => Self::ReservedIdentifier { span, token }, Some(TokenDescription::Keyword) => Self::Keyword { span, token }, @@ -2006,7 +2004,7 @@ pub(crate) enum TopLevelOrPatternNotAllowed { #[derive(Diagnostic)] #[diag(parse_cannot_be_raw_ident)] -pub struct CannotBeRawIdent { +pub(crate) struct CannotBeRawIdent { #[primary_span] pub span: Span, pub ident: Symbol, @@ -2014,14 +2012,14 @@ pub struct CannotBeRawIdent { #[derive(Diagnostic)] #[diag(parse_keyword_lifetime)] -pub struct KeywordLifetime { +pub(crate) struct KeywordLifetime { #[primary_span] pub span: Span, } #[derive(Diagnostic)] #[diag(parse_invalid_label)] -pub struct InvalidLabel { +pub(crate) struct InvalidLabel { #[primary_span] pub span: Span, pub name: Symbol, @@ -2029,7 +2027,7 @@ pub struct InvalidLabel { #[derive(Diagnostic)] #[diag(parse_cr_doc_comment)] -pub struct CrDocComment { +pub(crate) struct CrDocComment { #[primary_span] pub span: Span, pub block: bool, @@ -2037,14 +2035,14 @@ pub struct CrDocComment { #[derive(Diagnostic)] #[diag(parse_no_digits_literal, code = E0768)] -pub struct NoDigitsLiteral { +pub(crate) struct NoDigitsLiteral { #[primary_span] pub span: Span, } #[derive(Diagnostic)] #[diag(parse_invalid_digit_literal)] -pub struct InvalidDigitLiteral { +pub(crate) struct InvalidDigitLiteral { #[primary_span] pub span: Span, pub base: u32, @@ -2052,14 +2050,14 @@ pub struct InvalidDigitLiteral { #[derive(Diagnostic)] #[diag(parse_empty_exponent_float)] -pub struct EmptyExponentFloat { +pub(crate) struct EmptyExponentFloat { #[primary_span] pub span: Span, } #[derive(Diagnostic)] #[diag(parse_float_literal_unsupported_base)] -pub struct FloatLiteralUnsupportedBase { +pub(crate) struct FloatLiteralUnsupportedBase { #[primary_span] pub span: Span, pub base: &'static str, @@ -2068,7 +2066,7 @@ pub struct FloatLiteralUnsupportedBase { #[derive(Diagnostic)] #[diag(parse_unknown_prefix)] #[note] -pub struct UnknownPrefix<'a> { +pub(crate) struct UnknownPrefix<'a> { #[primary_span] #[label] pub span: Span, @@ -2079,12 +2077,12 @@ pub struct UnknownPrefix<'a> { #[derive(Subdiagnostic)] #[note(parse_macro_expands_to_adt_field)] -pub struct MacroExpandsToAdtField<'a> { +pub(crate) struct MacroExpandsToAdtField<'a> { pub adt_ty: &'a str, } #[derive(Subdiagnostic)] -pub enum UnknownPrefixSugg { +pub(crate) enum UnknownPrefixSugg { #[suggestion( parse_suggestion_br, code = "br", @@ -2114,7 +2112,7 @@ pub enum UnknownPrefixSugg { #[derive(Diagnostic)] #[diag(parse_too_many_hashes)] -pub struct TooManyHashes { +pub(crate) struct TooManyHashes { #[primary_span] pub span: Span, pub num: u32, @@ -2122,7 +2120,7 @@ pub struct TooManyHashes { #[derive(Diagnostic)] #[diag(parse_unknown_start_of_token)] -pub struct UnknownTokenStart { +pub(crate) struct UnknownTokenStart { #[primary_span] pub span: Span, pub escaped: String, @@ -2135,7 +2133,7 @@ pub struct UnknownTokenStart { } #[derive(Subdiagnostic)] -pub enum TokenSubstitution { +pub(crate) enum TokenSubstitution { #[suggestion( parse_sugg_quotes, code = "{suggestion}", @@ -2168,16 +2166,16 @@ pub enum TokenSubstitution { #[derive(Subdiagnostic)] #[note(parse_note_repeats)] -pub struct UnknownTokenRepeat { +pub(crate) struct UnknownTokenRepeat { pub repeats: usize, } #[derive(Subdiagnostic)] #[help(parse_help_null)] -pub struct UnknownTokenNull; +pub(crate) struct UnknownTokenNull; #[derive(Diagnostic)] -pub enum UnescapeError { +pub(crate) enum UnescapeError { #[diag(parse_invalid_unicode_escape)] #[help] InvalidUnicodeEscape { @@ -2322,7 +2320,7 @@ pub enum UnescapeError { } #[derive(Subdiagnostic)] -pub enum MoreThanOneCharSugg { +pub(crate) enum MoreThanOneCharSugg { #[suggestion( parse_consider_normalized, code = "{normalized}", @@ -2370,7 +2368,7 @@ pub enum MoreThanOneCharSugg { } #[derive(Subdiagnostic)] -pub enum MoreThanOneCharNote { +pub(crate) enum MoreThanOneCharNote { #[note(parse_followed_by)] AllCombining { #[primary_span] @@ -2388,7 +2386,7 @@ pub enum MoreThanOneCharNote { } #[derive(Subdiagnostic)] -pub enum NoBraceUnicodeSub { +pub(crate) enum NoBraceUnicodeSub { #[suggestion( parse_use_braces, code = "{suggestion}", @@ -2592,13 +2590,86 @@ pub(crate) struct ExpectedCommaAfterPatternField { #[derive(Diagnostic)] #[diag(parse_unexpected_expr_in_pat)] pub(crate) struct UnexpectedExpressionInPattern { + /// The unexpected expr's span. #[primary_span] #[label] pub span: Span, /// Was a `RangePatternBound` expected? pub is_bound: bool, - /// Was the unexpected expression a `MethodCallExpression`? - pub is_method_call: bool, + /// The unexpected expr's precedence (used in match arm guard suggestions). + pub expr_precedence: i8, +} + +#[derive(Subdiagnostic)] +pub(crate) enum UnexpectedExpressionInPatternSugg { + #[multipart_suggestion( + parse_unexpected_expr_in_pat_create_guard_sugg, + applicability = "maybe-incorrect" + )] + CreateGuard { + /// Where to put the suggested identifier. + #[suggestion_part(code = "{ident}")] + ident_span: Span, + /// Where to put the match arm. + #[suggestion_part(code = " if {ident} == {expr}")] + pat_hi: Span, + /// The suggested identifier. + ident: String, + /// The unexpected expression. + expr: String, + }, + + #[multipart_suggestion( + parse_unexpected_expr_in_pat_update_guard_sugg, + applicability = "maybe-incorrect" + )] + UpdateGuard { + /// Where to put the suggested identifier. + #[suggestion_part(code = "{ident}")] + ident_span: Span, + /// The beginning of the match arm guard's expression (insert a `(` if `Some`). + #[suggestion_part(code = "(")] + guard_lo: Option<Span>, + /// The end of the match arm guard's expression. + #[suggestion_part(code = "{guard_hi_paren} && {ident} == {expr}")] + guard_hi: Span, + /// Either `")"` or `""`. + guard_hi_paren: &'static str, + /// The suggested identifier. + ident: String, + /// The unexpected expression. + expr: String, + }, + + #[multipart_suggestion( + parse_unexpected_expr_in_pat_const_sugg, + applicability = "has-placeholders" + )] + Const { + /// Where to put the extracted constant declaration. + #[suggestion_part(code = "{indentation}const {ident}: /* Type */ = {expr};\n")] + stmt_lo: Span, + /// Where to put the suggested identifier. + #[suggestion_part(code = "{ident}")] + ident_span: Span, + /// The suggested identifier. + ident: String, + /// The unexpected expression. + expr: String, + /// The statement's block's indentation. + indentation: String, + }, + + #[multipart_suggestion( + parse_unexpected_expr_in_pat_inline_const_sugg, + applicability = "maybe-incorrect" + )] + InlineConst { + #[suggestion_part(code = "const {{ ")] + start_span: Span, + #[suggestion_part(code = " }}")] + end_span: Span, + }, } #[derive(Diagnostic)] @@ -2703,7 +2774,7 @@ pub(crate) struct InvalidDynKeyword { } #[derive(Subdiagnostic)] -pub enum HelpUseLatestEdition { +pub(crate) enum HelpUseLatestEdition { #[help(parse_help_set_edition_cargo)] #[note(parse_note_edition_guide)] Cargo { edition: Edition }, @@ -2713,7 +2784,7 @@ pub enum HelpUseLatestEdition { } impl HelpUseLatestEdition { - pub fn new() -> Self { + pub(crate) fn new() -> Self { let edition = LATEST_STABLE_EDITION; if rustc_session::utils::was_invoked_from_cargo() { Self::Cargo { edition } @@ -2725,7 +2796,7 @@ impl HelpUseLatestEdition { #[derive(Diagnostic)] #[diag(parse_box_syntax_removed)] -pub struct BoxSyntaxRemoved { +pub(crate) struct BoxSyntaxRemoved { #[primary_span] pub span: Span, #[subdiagnostic] @@ -2738,7 +2809,7 @@ pub struct BoxSyntaxRemoved { applicability = "machine-applicable", style = "verbose" )] -pub struct AddBoxNew { +pub(crate) struct AddBoxNew { #[suggestion_part(code = "Box::new(")] pub box_kw_and_lo: Span, #[suggestion_part(code = ")")] @@ -3190,7 +3261,7 @@ pub(crate) struct DotDotRangeAttribute { #[derive(Diagnostic)] #[diag(parse_invalid_attr_unsafe)] #[note] -pub struct InvalidAttrUnsafe { +pub(crate) struct InvalidAttrUnsafe { #[primary_span] #[label] pub span: Span, @@ -3199,7 +3270,7 @@ pub struct InvalidAttrUnsafe { #[derive(Diagnostic)] #[diag(parse_unsafe_attr_outside_unsafe)] -pub struct UnsafeAttrOutsideUnsafe { +pub(crate) struct UnsafeAttrOutsideUnsafe { #[primary_span] #[label] pub span: Span, @@ -3212,7 +3283,7 @@ pub struct UnsafeAttrOutsideUnsafe { parse_unsafe_attr_outside_unsafe_suggestion, applicability = "machine-applicable" )] -pub struct UnsafeAttrOutsideUnsafeSuggestion { +pub(crate) struct UnsafeAttrOutsideUnsafeSuggestion { #[suggestion_part(code = "unsafe(")] pub left: Span, #[suggestion_part(code = ")")] @@ -3221,7 +3292,7 @@ pub struct UnsafeAttrOutsideUnsafeSuggestion { #[derive(Diagnostic)] #[diag(parse_binder_before_modifiers)] -pub struct BinderBeforeModifiers { +pub(crate) struct BinderBeforeModifiers { #[primary_span] pub binder_span: Span, #[label] @@ -3230,7 +3301,7 @@ pub struct BinderBeforeModifiers { #[derive(Diagnostic)] #[diag(parse_binder_and_polarity)] -pub struct BinderAndPolarity { +pub(crate) struct BinderAndPolarity { #[primary_span] pub polarity_span: Span, #[label] @@ -3240,7 +3311,7 @@ pub struct BinderAndPolarity { #[derive(Diagnostic)] #[diag(parse_modifiers_and_polarity)] -pub struct PolarityAndModifiers { +pub(crate) struct PolarityAndModifiers { #[primary_span] pub polarity_span: Span, #[label] diff --git a/compiler/rustc_parse/src/lexer/diagnostics.rs b/compiler/rustc_parse/src/lexer/diagnostics.rs index 4d5d1ce099e..41108c91f2e 100644 --- a/compiler/rustc_parse/src/lexer/diagnostics.rs +++ b/compiler/rustc_parse/src/lexer/diagnostics.rs @@ -1,7 +1,7 @@ use rustc_ast::token::Delimiter; use rustc_errors::Diag; -use rustc_span::source_map::SourceMap; use rustc_span::Span; +use rustc_span::source_map::SourceMap; use super::UnmatchedDelim; diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index f30939093c2..3e46fc93fa4 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -8,12 +8,11 @@ use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_lexer::unescape::{self, EscapeError, Mode}; use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError}; +use rustc_session::lint::BuiltinLintDiag; use rustc_session::lint::builtin::{ RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT, }; -use rustc_session::lint::BuiltinLintDiag; use rustc_session::parse::ParseSess; -use rustc_span::edition::Edition; use rustc_span::symbol::Symbol; use rustc_span::{BytePos, Pos, Span}; use tracing::debug; @@ -188,9 +187,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { preceded_by_whitespace = true; continue; } - rustc_lexer::TokenKind::Ident => { - self.ident(start) - } + rustc_lexer::TokenKind::Ident => self.ident(start), rustc_lexer::TokenKind::RawIdent => { let sym = nfc_normalize(self.str_from(start + BytePos(2))); let span = self.mk_sp(start, self.pos); @@ -205,20 +202,31 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.report_unknown_prefix(start); self.ident(start) } - rustc_lexer::TokenKind::InvalidIdent - | rustc_lexer::TokenKind::InvalidPrefix + rustc_lexer::TokenKind::UnknownPrefixLifetime => { + self.report_unknown_prefix(start); + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::InvalidIdent | rustc_lexer::TokenKind::InvalidPrefix // Do not recover an identifier with emoji if the codepoint is a confusable // with a recoverable substitution token, like `➖`. - if !UNICODE_ARRAY - .iter() - .any(|&(c, _, _)| { - let sym = self.str_from(start); - sym.chars().count() == 1 && c == sym.chars().next().unwrap() - }) => + if !UNICODE_ARRAY.iter().any(|&(c, _, _)| { + let sym = self.str_from(start); + sym.chars().count() == 1 && c == sym.chars().next().unwrap() + }) => { let sym = nfc_normalize(self.str_from(start)); let span = self.mk_sp(start, self.pos); - self.psess.bad_unicode_identifiers.borrow_mut().entry(sym).or_default() + self.psess + .bad_unicode_identifiers + .borrow_mut() + .entry(sym) + .or_default() .push(span); token::Ident(sym, IdentIsRaw::No) } @@ -249,9 +257,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let suffix = if suffix_start < self.pos { let string = self.str_from(suffix_start); if string == "_" { - self - .dcx() - .emit_err(errors::UnderscoreLiteralSuffix { span: self.mk_sp(suffix_start, self.pos) }); + self.dcx().emit_err(errors::UnderscoreLiteralSuffix { + span: self.mk_sp(suffix_start, self.pos), + }); None } else { Some(Symbol::intern(string)) @@ -269,12 +277,50 @@ impl<'psess, 'src> StringReader<'psess, 'src> { self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); if starts_with_number { let span = self.mk_sp(start, self.pos); - self.dcx().struct_err("lifetimes cannot start with a number") + self.dcx() + .struct_err("lifetimes cannot start with a number") .with_span(span) .stash(span, StashKey::LifetimeIsChar); } let ident = Symbol::intern(lifetime_name); - token::Lifetime(ident) + token::Lifetime(ident, IdentIsRaw::No) + } + rustc_lexer::TokenKind::RawLifetime => { + self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + + let ident_start = start + BytePos(3); + let prefix_span = self.mk_sp(start, ident_start); + + if prefix_span.at_least_rust_2021() { + let lifetime_name_without_tick = self.str_from(ident_start); + // Put the `'` back onto the lifetime name. + let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.len() + 1); + lifetime_name.push('\''); + lifetime_name += lifetime_name_without_tick; + let sym = Symbol::intern(&lifetime_name); + + // Make sure we mark this as a raw identifier. + self.psess.raw_identifier_spans.push(self.mk_sp(start, self.pos)); + + token::Lifetime(sym, IdentIsRaw::Yes) + } else { + // Otherwise, this should be parsed like `'r`. Warn about it though. + self.psess.buffer_lint( + RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, + prefix_span, + ast::CRATE_NODE_ID, + BuiltinLintDiag::RawPrefix(prefix_span), + ); + + // Reset the state so we just lex the `'r`. + let lt_start = start + BytePos(2); + self.pos = lt_start; + self.cursor = Cursor::new(&str_before[2 as usize..]); + + let lifetime_name = self.str_from(start); + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident, IdentIsRaw::No) + } } rustc_lexer::TokenKind::Semi => token::Semi, rustc_lexer::TokenKind::Comma => token::Comma, @@ -331,16 +377,19 @@ impl<'psess, 'src> StringReader<'psess, 'src> { // first remove compound tokens like `<<` from `rustc_lexer`, and then add // fancier error recovery to it, as there will be less overall work to do this // way. - let (token, sugg) = unicode_chars::check_for_substitution(self, start, c, repeats+1); + let (token, sugg) = + unicode_chars::check_for_substitution(self, start, c, repeats + 1); self.dcx().emit_err(errors::UnknownTokenStart { span: self.mk_sp(start, self.pos + Pos::from_usize(repeats * c.len_utf8())), escaped: escaped_char(c), sugg, - null: if c == '\x00' {Some(errors::UnknownTokenNull)} else {None}, + null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None }, repeat: if repeats > 0 { swallow_next_invalid = repeats; Some(errors::UnknownTokenRepeat { repeats }) - } else {None} + } else { + None + }, }); if let Some(token) = token { @@ -699,7 +748,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { let expn_data = prefix_span.ctxt().outer_expn_data(); - if expn_data.edition >= Edition::Edition2021 { + if expn_data.edition.at_least_rust_2021() { // In Rust 2021, this is a hard error. let sugg = if prefix == "rb" { Some(errors::UnknownPrefixSugg::UseBr(prefix_span)) @@ -817,7 +866,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> { } pub fn nfc_normalize(string: &str) -> Symbol { - use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization}; + use unicode_normalization::{IsNormalized, UnicodeNormalization, is_nfc_quick}; match is_nfc_quick(string.chars()) { IsNormalized::Yes => Symbol::intern(string), _ => { diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index e7a7105803f..d35c9c7bae7 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -5,7 +5,7 @@ use rustc_errors::{Applicability, PErr}; use rustc_span::symbol::kw; use super::diagnostics::{ - report_suspicious_mismatch_block, same_indentation_level, TokenTreeDiagInfo, + TokenTreeDiagInfo, report_suspicious_mismatch_block, same_indentation_level, }; use super::{StringReader, UnmatchedDelim}; use crate::Parser; @@ -299,7 +299,7 @@ impl<'psess, 'src> TokenTreesReader<'psess, 'src> { } return diff_errs; } - return errs; + errs } fn close_delim_err(&mut self, delim: Delimiter) -> PErr<'psess> { diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index efa53f0962b..2e066f0179c 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -40,8 +40,8 @@ pub(crate) fn emit_unescape_error( dcx.emit_err(UnescapeError::InvalidUnicodeEscape { span: err_span, surrogate: false }) } EscapeError::MoreThanOneChar => { - use unicode_normalization::char::is_combining_mark; use unicode_normalization::UnicodeNormalization; + use unicode_normalization::char::is_combining_mark; let mut sugg = None; let mut note = None; diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 37079271493..f7a8b8780ed 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -11,13 +11,14 @@ #![feature(if_let_guard)] #![feature(iter_intersperse)] #![feature(let_chains)] +#![warn(unreachable_pub)] // tidy-alphabetical-end use std::path::Path; use rustc_ast as ast; use rustc_ast::tokenstream::TokenStream; -use rustc_ast::{token, AttrItem, Attribute, MetaItem}; +use rustc_ast::{AttrItem, Attribute, MetaItem, token}; use rustc_ast_pretty::pprust; use rustc_data_structures::sync::Lrc; use rustc_errors::{Diag, FatalError, PResult}; @@ -28,7 +29,7 @@ pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments"); #[macro_use] pub mod parser; -use parser::{make_unclosed_delims_error, Parser}; +use parser::{Parser, make_unclosed_delims_error}; pub mod lexer; pub mod validate_attr; diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 6391ff901cb..c65cf3f40f6 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -76,6 +76,7 @@ impl<'a> Parser<'a> { token::CommentKind::Line => OuterAttributeType::DocComment, token::CommentKind::Block => OuterAttributeType::DocBlockComment, }, + true, ) { err.note(fluent::parse_note); err.span_suggestion_verbose( @@ -139,7 +140,11 @@ impl<'a> Parser<'a> { // Emit error if inner attribute is encountered and forbidden. if style == ast::AttrStyle::Inner { - this.error_on_forbidden_inner_attr(attr_sp, inner_parse_policy); + this.error_on_forbidden_inner_attr( + attr_sp, + inner_parse_policy, + item.is_valid_for_outer_style(), + ); } Ok(attr::mk_attr_from_item(&self.psess.attr_id_generator, item, None, style, attr_sp)) @@ -151,6 +156,7 @@ impl<'a> Parser<'a> { err: &mut Diag<'_>, span: Span, attr_type: OuterAttributeType, + suggest_to_outer: bool, ) -> Option<Span> { let mut snapshot = self.create_snapshot_for_diagnostic(); let lo = span.lo() @@ -185,16 +191,18 @@ impl<'a> Parser<'a> { // FIXME(#100717) err.arg("item", item.kind.descr()); err.span_label(item.span, fluent::parse_label_does_not_annotate_this); - err.span_suggestion_verbose( - replacement_span, - fluent::parse_sugg_change_inner_to_outer, - match attr_type { - OuterAttributeType::Attribute => "", - OuterAttributeType::DocBlockComment => "*", - OuterAttributeType::DocComment => "/", - }, - rustc_errors::Applicability::MachineApplicable, - ); + if suggest_to_outer { + err.span_suggestion_verbose( + replacement_span, + fluent::parse_sugg_change_inner_to_outer, + match attr_type { + OuterAttributeType::Attribute => "", + OuterAttributeType::DocBlockComment => "*", + OuterAttributeType::DocComment => "/", + }, + rustc_errors::Applicability::MachineApplicable, + ); + } return None; } Err(item_err) => { @@ -205,7 +213,12 @@ impl<'a> Parser<'a> { Some(replacement_span) } - pub(super) fn error_on_forbidden_inner_attr(&self, attr_sp: Span, policy: InnerAttrPolicy) { + pub(super) fn error_on_forbidden_inner_attr( + &self, + attr_sp: Span, + policy: InnerAttrPolicy, + suggest_to_outer: bool, + ) { if let InnerAttrPolicy::Forbidden(reason) = policy { let mut diag = match reason.as_ref().copied() { Some(InnerAttrForbiddenReason::AfterOuterDocComment { prev_doc_comment_span }) => { @@ -239,6 +252,7 @@ impl<'a> Parser<'a> { &mut diag, attr_sp, OuterAttributeType::Attribute, + suggest_to_outer, ) .is_some() { diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 49df2811d52..8bd615e6d79 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::{iter, mem}; use rustc_ast::token::{Delimiter, Token, TokenKind}; @@ -6,9 +7,10 @@ use rustc_ast::tokenstream::{ Spacing, ToAttrTokenStream, }; use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens}; +use rustc_data_structures::fx::FxHashSet; use rustc_errors::PResult; use rustc_session::parse::ParseSess; -use rustc_span::{sym, Span, DUMMY_SP}; +use rustc_span::{DUMMY_SP, Span, sym}; use super::{ Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange, @@ -106,7 +108,7 @@ struct LazyAttrTokenStreamImpl { start_token: (Token, Spacing), cursor_snapshot: TokenCursor, num_calls: u32, - break_last_token: bool, + break_last_token: u32, node_replacements: Box<[NodeReplacement]>, } @@ -134,9 +136,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { node_replacements.array_windows() { assert!( - node_range.0.end <= next_node_range.0.start - || node_range.0.end >= next_node_range.0.end, - "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", + node_range.0.end <= next_node_range.0.start, + "Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})", node_range, tokens, next_node_range, @@ -144,20 +145,8 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl { ); } - // Process the replace ranges, starting from the highest start - // position and working our way back. If have tokens like: - // - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // Then we will generate replace ranges for both - // the `#[cfg(FALSE)] field: bool` and the entire - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // By starting processing from the replace range with the greatest - // start position, we ensure that any (outer) replace range which - // encloses another (inner) replace range will fully overwrite the - // inner range's replacement. - for (node_range, target) in node_replacements.into_iter().rev() { + // Process the replace ranges. + for (node_range, target) in node_replacements.into_iter() { assert!( !node_range.0.is_empty(), "Cannot replace an empty node range: {:?}", @@ -234,6 +223,8 @@ impl<'a> Parser<'a> { force_collect: ForceCollect, f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>, ) -> PResult<'a, R> { + let possible_capture_mode = self.capture_cfg; + // We must collect if anything could observe the collected tokens, i.e. // if any of the following conditions hold. // - We are force collecting tokens (because force collection requires @@ -244,9 +235,9 @@ impl<'a> Parser<'a> { // - Our target supports custom inner attributes (custom // inner attribute invocation might require token capturing). || R::SUPPORTS_CUSTOM_INNER_ATTRS - // - We are in `capture_cfg` mode (which requires tokens if + // - We are in "possible capture mode" (which requires tokens if // the parsed node has `#[cfg]` or `#[cfg_attr]` attributes). - || self.capture_cfg; + || possible_capture_mode; if !needs_collection { return Ok(f(self, attrs.attrs)?.0); } @@ -267,18 +258,48 @@ impl<'a> Parser<'a> { res? }; - // When we're not in `capture_cfg` mode, then skip collecting and - // return early if either of the following conditions hold. // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`). + // - `Some(None)`: Our target supports tokens and has none. // - `Some(Some(_))`: Our target already has tokens set (e.g. we've - // parsed something like `#[my_attr] $item`). The actual parsing code - // takes care of prepending any attributes to the nonterminal, so we - // don't need to modify the already captured tokens. + // parsed something like `#[my_attr] $item`). + let ret_can_hold_tokens = matches!(ret.tokens_mut(), Some(None)); + + // Ignore any attributes we've previously processed. This happens when + // an inner call to `collect_tokens` returns an AST node and then an + // outer call ends up with the same AST node without any additional + // wrapping layer. + let mut seen_indices = FxHashSet::default(); + for (i, attr) in ret.attrs().iter().enumerate() { + let is_unseen = self.capture_state.seen_attrs.insert(attr.id); + if !is_unseen { + seen_indices.insert(i); + } + } + let ret_attrs: Cow<'_, [Attribute]> = + if seen_indices.is_empty() { + Cow::Borrowed(ret.attrs()) + } else { + let ret_attrs = + ret.attrs() + .iter() + .enumerate() + .filter_map(|(i, attr)| { + if seen_indices.contains(&i) { None } else { Some(attr.clone()) } + }) + .collect(); + Cow::Owned(ret_attrs) + }; + + // When we're not in "definite capture mode", then skip collecting and + // return early if `ret` doesn't support tokens or already has some. // // Note that this check is independent of `force_collect`. There's no // need to collect tokens when we don't support tokens or already have // tokens. - if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) { + let definite_capture_mode = self.capture_cfg + && matches!(self.capture_state.capturing, Capturing::Yes) + && has_cfg_or_cfg_attr(&ret_attrs); + if !definite_capture_mode && !ret_can_hold_tokens { return Ok(ret); } @@ -297,12 +318,12 @@ impl<'a> Parser<'a> { // outer and inner attributes. So this check is more precise than // the earlier `needs_tokens` check, and we don't need to // check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.) - || needs_tokens(ret.attrs()) - // - We are in `capture_cfg` mode and there are `#[cfg]` or - // `#[cfg_attr]` attributes. (During normal non-`capture_cfg` - // parsing, we don't need any special capturing for those - // attributes, because they're builtin.) - || (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs())); + || needs_tokens(&ret_attrs) + // - We are in "definite capture mode", which requires that there + // are `#[cfg]` or `#[cfg_attr]` attributes. (During normal + // non-`capture_cfg` parsing, we don't need any special capturing + // for those attributes, because they're builtin.) + || definite_capture_mode; if !needs_collection { return Ok(ret); } @@ -318,17 +339,20 @@ impl<'a> Parser<'a> { let parser_replacements_end = self.capture_state.parser_replacements.len(); assert!( - !(self.break_last_token && matches!(capture_trailing, Trailing::Yes)), - "Cannot set break_last_token and have trailing token" + !(self.break_last_token > 0 && matches!(capture_trailing, Trailing::Yes)), + "Cannot have break_last_token > 0 and have trailing token" ); + assert!(self.break_last_token <= 2, "cannot break token more than twice"); let end_pos = self.num_bump_calls + capture_trailing as u32 - // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens), then - // extend the range of captured tokens to include it, since the parser was not actually - // bumped past it. When the `LazyAttrTokenStream` gets converted into an - // `AttrTokenStream`, we will create the proper token. - + self.break_last_token as u32; + // If we "broke" the last token (e.g. breaking a `>>` token once into `>` + `>`, or + // breaking a `>>=` token twice into `>` + `>` + `=`), then extend the range of + // captured tokens to include it, because the parser was not actually bumped past it. + // (Even if we broke twice, it was still just one token originally, hence the `1`.) + // When the `LazyAttrTokenStream` gets converted into an `AttrTokenStream`, we will + // rebreak that final token once or twice. + + if self.break_last_token == 0 { 0 } else { 1 }; let num_calls = end_pos - collect_pos.start_pos; @@ -336,7 +360,7 @@ impl<'a> Parser<'a> { // `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`, // which means the relevant tokens will be removed. (More details below.) let mut inner_attr_parser_replacements = Vec::new(); - for attr in ret.attrs() { + for attr in ret_attrs.iter() { if attr.style == ast::AttrStyle::Inner { if let Some(inner_attr_parser_range) = self.capture_state.inner_attr_parser_ranges.remove(&attr.id) @@ -359,11 +383,10 @@ impl<'a> Parser<'a> { // from `ParserRange` form to `NodeRange` form. We will perform the actual // replacement only when we convert the `LazyAttrTokenStream` to an // `AttrTokenStream`. - self.capture_state.parser_replacements - [parser_replacements_start..parser_replacements_end] - .iter() - .cloned() - .chain(inner_attr_parser_replacements.iter().cloned()) + self.capture_state + .parser_replacements + .drain(parser_replacements_start..parser_replacements_end) + .chain(inner_attr_parser_replacements) .map(|(parser_range, data)| { (NodeRange::new(parser_range, collect_pos.start_pos), data) }) @@ -399,21 +422,13 @@ impl<'a> Parser<'a> { break_last_token: self.break_last_token, node_replacements, }); + let mut tokens_used = false; - // If we support tokens and don't already have them, store the newly captured tokens. - if let Some(target_tokens @ None) = ret.tokens_mut() { - *target_tokens = Some(tokens.clone()); - } - - // If `capture_cfg` is set and we're inside a recursive call to - // `collect_tokens`, then we need to register a replace range if we - // have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager - // cfg-expansion on the captured token stream. - if self.capture_cfg - && matches!(self.capture_state.capturing, Capturing::Yes) - && has_cfg_or_cfg_attr(ret.attrs()) - { - assert!(!self.break_last_token, "Should not have unglued last token with cfg attr"); + // If in "definite capture mode" we need to register a replace range + // for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run + // eager cfg-expansion on the captured token stream. + if definite_capture_mode { + assert!(self.break_last_token == 0, "Should not have unglued last token with cfg attr"); // What is the status here when parsing the example code at the top of this method? // @@ -429,7 +444,9 @@ impl<'a> Parser<'a> { // cfg-expand this AST node. let start_pos = if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos }; - let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens }; + let target = + AttrsTarget { attrs: ret_attrs.iter().cloned().collect(), tokens: tokens.clone() }; + tokens_used = true; self.capture_state .parser_replacements .push((ParserRange(start_pos..end_pos), Some(target))); @@ -438,7 +455,16 @@ impl<'a> Parser<'a> { // the outermost call to this method. self.capture_state.parser_replacements.clear(); self.capture_state.inner_attr_parser_ranges.clear(); + self.capture_state.seen_attrs.clear(); } + + // If we support tokens and don't already have them, store the newly captured tokens. + if let Some(target_tokens @ None) = ret.tokens_mut() { + tokens_used = true; + *target_tokens = Some(tokens); + } + + assert!(tokens_used); // check we didn't create `tokens` unnecessarily Ok(ret) } } @@ -448,7 +474,7 @@ impl<'a> Parser<'a> { /// close delims. fn make_attr_token_stream( iter: impl Iterator<Item = FlatToken>, - break_last_token: bool, + break_last_token: u32, ) -> AttrTokenStream { #[derive(Debug)] struct FrameData { @@ -462,10 +488,10 @@ fn make_attr_token_stream( for flat_token in iter { match flat_token { FlatToken::Token((Token { kind: TokenKind::OpenDelim(delim), span }, spacing)) => { - stack_rest.push(mem::replace( - &mut stack_top, - FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, - )); + stack_rest.push(mem::replace(&mut stack_top, FrameData { + open_delim_sp: Some((delim, span, spacing)), + inner: vec![], + })); } FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => { let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); @@ -490,18 +516,17 @@ fn make_attr_token_stream( } } - if break_last_token { + if break_last_token > 0 { let last_token = stack_top.inner.pop().unwrap(); if let AttrTokenTree::Token(last_token, spacing) = last_token { - let unglued_first = last_token.kind.break_two_token_op().unwrap().0; + let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap(); - // An 'unglued' token is always two ASCII characters + // Tokens are always ASCII chars, so we can use byte arithmetic here. let mut first_span = last_token.span.shrink_to_lo(); - first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1)); + first_span = + first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token)); - stack_top - .inner - .push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing)); + stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing)); } else { panic!("Unexpected last token {last_token:?}") } @@ -510,9 +535,11 @@ fn make_attr_token_stream( } /// Tokens are needed if: -/// - any non-single-segment attributes (other than doc comments) are present; or -/// - any `cfg_attr` attributes are present; -/// - any single-segment, non-builtin attributes are present. +/// - any non-single-segment attributes (other than doc comments) are present, +/// e.g. `rustfmt::skip`; or +/// - any `cfg_attr` attributes are present; or +/// - any single-segment, non-builtin attributes are present, e.g. `derive`, +/// `test`, `global_allocator`. fn needs_tokens(attrs: &[ast::Attribute]) -> bool { attrs.iter().any(|attr| match attr.ident() { None => !attr.is_doc_comment(), diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index fcdc10c0837..f32307f6ed4 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -15,14 +15,15 @@ use rustc_ast::{ use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashSet; use rustc_errors::{ - pluralize, Applicability, Diag, DiagCtxtHandle, ErrorGuaranteed, FatalError, PErr, PResult, - Subdiagnostic, + Applicability, Diag, DiagCtxtHandle, ErrorGuaranteed, FatalError, PErr, PResult, Subdiagnostic, + Suggestions, pluralize, }; use rustc_session::errors::ExprParenthesesNeeded; +use rustc_span::edit_distance::find_best_match_for_name; use rustc_span::source_map::Spanned; -use rustc_span::symbol::{kw, sym, Ident}; -use rustc_span::{BytePos, Span, SpanSnippetError, Symbol, DUMMY_SP}; -use thin_vec::{thin_vec, ThinVec}; +use rustc_span::symbol::{AllKeywords, Ident, kw, sym}; +use rustc_span::{BytePos, DUMMY_SP, Span, SpanSnippetError, Symbol}; +use thin_vec::{ThinVec, thin_vec}; use tracing::{debug, trace}; use super::pat::Expected; @@ -203,6 +204,37 @@ impl std::fmt::Display for UnaryFixity { } } +#[derive(Debug, rustc_macros::Subdiagnostic)] +#[suggestion( + parse_misspelled_kw, + applicability = "machine-applicable", + code = "{similar_kw}", + style = "verbose" +)] +struct MisspelledKw { + similar_kw: String, + #[primary_span] + span: Span, + is_incorrect_case: bool, +} + +/// Checks if the given `lookup` identifier is similar to any keyword symbol in `candidates`. +fn find_similar_kw(lookup: Ident, candidates: &[Symbol]) -> Option<MisspelledKw> { + let lowercase = lookup.name.as_str().to_lowercase(); + let lowercase_sym = Symbol::intern(&lowercase); + if candidates.contains(&lowercase_sym) { + Some(MisspelledKw { similar_kw: lowercase, span: lookup.span, is_incorrect_case: true }) + } else if let Some(similar_sym) = find_best_match_for_name(candidates, lookup.name, None) { + Some(MisspelledKw { + similar_kw: similar_sym.to_string(), + span: lookup.span, + is_incorrect_case: false, + }) + } else { + None + } +} + struct MultiSugg { msg: String, patches: Vec<(Span, String)>, @@ -638,9 +670,9 @@ impl<'a> Parser<'a> { let concat = Symbol::intern(&format!("{prev}{cur}")); let ident = Ident::new(concat, DUMMY_SP); if ident.is_used_keyword() || ident.is_reserved() || ident.is_raw_guess() { - let span = self.prev_token.span.to(self.token.span); + let concat_span = self.prev_token.span.to(self.token.span); err.span_suggestion_verbose( - span, + concat_span, format!("consider removing the space to spell keyword `{concat}`"), concat, Applicability::MachineApplicable, @@ -689,15 +721,12 @@ impl<'a> Parser<'a> { let span = self.token.span.with_lo(pos).with_hi(pos); err.span_suggestion_verbose( span, - format!( - "add a space before {} to write a regular comment", - match (kind, style) { - (token::CommentKind::Line, ast::AttrStyle::Inner) => "`!`", - (token::CommentKind::Block, ast::AttrStyle::Inner) => "`!`", - (token::CommentKind::Line, ast::AttrStyle::Outer) => "the last `/`", - (token::CommentKind::Block, ast::AttrStyle::Outer) => "the last `*`", - }, - ), + format!("add a space before {} to write a regular comment", match (kind, style) { + (token::CommentKind::Line, ast::AttrStyle::Inner) => "`!`", + (token::CommentKind::Block, ast::AttrStyle::Inner) => "`!`", + (token::CommentKind::Line, ast::AttrStyle::Outer) => "the last `/`", + (token::CommentKind::Block, ast::AttrStyle::Outer) => "the last `*`", + },), " ".to_string(), Applicability::MachineApplicable, ); @@ -741,9 +770,61 @@ impl<'a> Parser<'a> { err.span_label(sp, label_exp); err.span_label(self.token.span, "unexpected token"); } + + // Check for misspelled keywords if there are no suggestions added to the diagnostic. + if matches!(&err.suggestions, Suggestions::Enabled(list) if list.is_empty()) { + self.check_for_misspelled_kw(&mut err, &expected); + } Err(err) } + /// Checks if the current token or the previous token are misspelled keywords + /// and adds a helpful suggestion. + fn check_for_misspelled_kw(&self, err: &mut Diag<'_>, expected: &[TokenType]) { + let Some((curr_ident, _)) = self.token.ident() else { + return; + }; + let expected_tokens: &[TokenType] = + expected.len().checked_sub(10).map_or(&expected, |index| &expected[index..]); + let expected_keywords: Vec<Symbol> = expected_tokens + .iter() + .filter_map(|token| if let TokenType::Keyword(kw) = token { Some(*kw) } else { None }) + .collect(); + + // When there are a few keywords in the last ten elements of `self.expected_tokens` and the current + // token is an identifier, it's probably a misspelled keyword. + // This handles code like `async Move {}`, misspelled `if` in match guard, misspelled `else` in `if`-`else` + // and mispelled `where` in a where clause. + if !expected_keywords.is_empty() + && !curr_ident.is_used_keyword() + && let Some(misspelled_kw) = find_similar_kw(curr_ident, &expected_keywords) + { + err.subdiagnostic(misspelled_kw); + // We don't want other suggestions to be added as they are most likely meaningless + // when there is a misspelled keyword. + err.seal_suggestions(); + } else if let Some((prev_ident, _)) = self.prev_token.ident() + && !prev_ident.is_used_keyword() + { + // We generate a list of all keywords at runtime rather than at compile time + // so that it gets generated only when the diagnostic needs it. + // Also, it is unlikely that this list is generated multiple times because the + // parser halts after execution hits this path. + let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition()); + + // Otherwise, check the previous token with all the keywords as possible candidates. + // This handles code like `Struct Human;` and `While a < b {}`. + // We check the previous token only when the current token is an identifier to avoid false + // positives like suggesting keyword `for` for `extern crate foo {}`. + if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) { + err.subdiagnostic(misspelled_kw); + // We don't want other suggestions to be added as they are most likely meaningless + // when there is a misspelled keyword. + err.seal_suggestions(); + } + } + } + /// The user has written `#[attr] expr` which is unsupported. (#106020) pub(super) fn attr_on_non_tail_expr(&self, expr: &Expr) -> ErrorGuaranteed { // Missing semicolon typo error. @@ -846,6 +927,7 @@ impl<'a> Parser<'a> { ); } } + err.emit() } @@ -1851,14 +1933,13 @@ impl<'a> Parser<'a> { (token::Eof, None) => (self.prev_token.span, self.token.span), _ => (self.prev_token.span.shrink_to_hi(), self.token.span), }; - let msg = format!( - "expected `{}`, found {}", - token_str, - match (&self.token.kind, self.subparser_name) { - (token::Eof, Some(origin)) => format!("end of {origin}"), - _ => this_token_str, - }, - ); + let msg = format!("expected `{}`, found {}", token_str, match ( + &self.token.kind, + self.subparser_name + ) { + (token::Eof, Some(origin)) => format!("end of {origin}"), + _ => this_token_str, + },); let mut err = self.dcx().struct_span_err(sp, msg); let label_exp = format!("expected `{token_str}`"); let sm = self.psess.source_map(); @@ -2474,7 +2555,7 @@ impl<'a> Parser<'a> { err.delay_as_bug(); } } - return Ok(false); // Don't continue. + Ok(false) // Don't continue. } /// Attempt to parse a generic const argument that has not been enclosed in braces. @@ -2779,27 +2860,25 @@ impl<'a> Parser<'a> { PatKind::Ident(BindingMode::NONE, ident, None) => { match &first_pat.kind { PatKind::Ident(_, old_ident, _) => { - let path = PatKind::Path( - None, - Path { - span: new_span, - segments: thin_vec![ - PathSegment::from_ident(*old_ident), - PathSegment::from_ident(*ident), - ], - tokens: None, - }, - ); + let path = PatKind::Path(None, Path { + span: new_span, + segments: thin_vec![ + PathSegment::from_ident(*old_ident), + PathSegment::from_ident(*ident), + ], + tokens: None, + }); first_pat = self.mk_pat(new_span, path); show_sugg = true; } PatKind::Path(old_qself, old_path) => { let mut segments = old_path.segments.clone(); segments.push(PathSegment::from_ident(*ident)); - let path = PatKind::Path( - old_qself.clone(), - Path { span: new_span, segments, tokens: None }, - ); + let path = PatKind::Path(old_qself.clone(), Path { + span: new_span, + segments, + tokens: None, + }); first_pat = self.mk_pat(new_span, path); show_sugg = true; } diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 84684e808d9..0ac6133e828 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -10,25 +10,25 @@ use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; use rustc_ast::util::case::Case; use rustc_ast::util::classify; -use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; -use rustc_ast::visit::{walk_expr, Visitor}; +use rustc_ast::util::parser::{AssocOp, Fixity, prec_let_scrutinee_needs_par}; +use rustc_ast::visit::{Visitor, walk_expr}; use rustc_ast::{ self as ast, AnonConst, Arm, AttrStyle, AttrVec, BinOp, BinOpKind, BlockCheckMode, CaptureBy, - ClosureBinder, Expr, ExprField, ExprKind, FnDecl, FnRetTy, Label, MacCall, MetaItemLit, - Movability, Param, RangeLimits, StmtKind, Ty, TyKind, UnOp, DUMMY_NODE_ID, + ClosureBinder, DUMMY_NODE_ID, Expr, ExprField, ExprKind, FnDecl, FnRetTy, Label, MacCall, + MetaItemLit, Movability, Param, RangeLimits, StmtKind, Ty, TyKind, UnOp, }; use rustc_ast_pretty::pprust; use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_errors::{Applicability, Diag, PResult, StashKey, Subdiagnostic}; use rustc_lexer::unescape::unescape_char; use rustc_macros::Subdiagnostic; -use rustc_session::errors::{report_lit_error, ExprParenthesesNeeded}; -use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP; +use rustc_session::errors::{ExprParenthesesNeeded, report_lit_error}; use rustc_session::lint::BuiltinLintDiag; +use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP; use rustc_span::source_map::{self, Spanned}; -use rustc_span::symbol::{kw, sym, Ident, Symbol}; +use rustc_span::symbol::{Ident, Symbol, kw, sym}; use rustc_span::{BytePos, ErrorGuaranteed, Pos, Span}; -use thin_vec::{thin_vec, ThinVec}; +use thin_vec::{ThinVec, thin_vec}; use tracing::instrument; use super::diagnostics::SnapshotParser; @@ -41,7 +41,7 @@ use super::{ use crate::{errors, maybe_recover_from_interpolated_ty_qpath}; #[derive(Debug)] -enum DestructuredFloat { +pub(super) enum DestructuredFloat { /// 1e2 Single(Symbol, Span), /// 1. @@ -49,7 +49,7 @@ enum DestructuredFloat { /// 1.2 | 1.2e3 MiddleDot(Symbol, Span, Span, Symbol, Span), /// Invalid - Error, + Error(ErrorGuaranteed), } impl<'a> Parser<'a> { @@ -811,20 +811,17 @@ impl<'a> Parser<'a> { // Check if an illegal postfix operator has been added after the cast. // If the resulting expression is not a cast, it is an illegal postfix operator. if !matches!(with_postfix.kind, ExprKind::Cast(_, _)) { - let msg = format!( - "cast cannot be followed by {}", - match with_postfix.kind { - ExprKind::Index(..) => "indexing", - ExprKind::Try(_) => "`?`", - ExprKind::Field(_, _) => "a field access", - ExprKind::MethodCall(_) => "a method call", - ExprKind::Call(_, _) => "a function call", - ExprKind::Await(_, _) => "`.await`", - ExprKind::Match(_, _, MatchKind::Postfix) => "a postfix match", - ExprKind::Err(_) => return Ok(with_postfix), - _ => unreachable!("parse_dot_or_call_expr_with_ shouldn't produce this"), - } - ); + let msg = format!("cast cannot be followed by {}", match with_postfix.kind { + ExprKind::Index(..) => "indexing", + ExprKind::Try(_) => "`?`", + ExprKind::Field(_, _) => "a field access", + ExprKind::MethodCall(_) => "a method call", + ExprKind::Call(_, _) => "a function call", + ExprKind::Await(_, _) => "`.await`", + ExprKind::Match(_, _, MatchKind::Postfix) => "a postfix match", + ExprKind::Err(_) => return Ok(with_postfix), + _ => unreachable!("parse_dot_or_call_expr_with_ shouldn't produce this"), + }); let mut err = self.dcx().struct_span_err(span, msg); let suggest_parens = |err: &mut Diag<'_>| { @@ -1008,7 +1005,7 @@ impl<'a> Parser<'a> { self.mk_expr_tuple_field_access(lo, ident1_span, base, sym1, None); self.mk_expr_tuple_field_access(lo, ident2_span, base1, sym2, suffix) } - DestructuredFloat::Error => base, + DestructuredFloat::Error(_) => base, }) } _ => { @@ -1018,7 +1015,7 @@ impl<'a> Parser<'a> { } } - fn error_unexpected_after_dot(&self) { + fn error_unexpected_after_dot(&self) -> ErrorGuaranteed { let actual = pprust::token_to_string(&self.token); let span = self.token.span; let sm = self.psess.source_map(); @@ -1028,18 +1025,20 @@ impl<'a> Parser<'a> { } _ => (span, actual), }; - self.dcx().emit_err(errors::UnexpectedTokenAfterDot { span, actual }); + self.dcx().emit_err(errors::UnexpectedTokenAfterDot { span, actual }) } - // We need an identifier or integer, but the next token is a float. - // Break the float into components to extract the identifier or integer. + /// We need an identifier or integer, but the next token is a float. + /// Break the float into components to extract the identifier or integer. + /// + /// See also [`TokenKind::break_two_token_op`] which does similar splitting of `>>` into `>`. + // // FIXME: With current `TokenCursor` it's hard to break tokens into more than 2 - // parts unless those parts are processed immediately. `TokenCursor` should either - // support pushing "future tokens" (would be also helpful to `break_and_eat`), or - // we should break everything including floats into more basic proc-macro style - // tokens in the lexer (probably preferable). - // See also `TokenKind::break_two_token_op` which does similar splitting of `>>` into `>`. - fn break_up_float(&self, float: Symbol, span: Span) -> DestructuredFloat { + // parts unless those parts are processed immediately. `TokenCursor` should either + // support pushing "future tokens" (would be also helpful to `break_and_eat`), or + // we should break everything including floats into more basic proc-macro style + // tokens in the lexer (probably preferable). + pub(super) fn break_up_float(&self, float: Symbol, span: Span) -> DestructuredFloat { #[derive(Debug)] enum FloatComponent { IdentLike(String), @@ -1078,34 +1077,30 @@ impl<'a> Parser<'a> { DestructuredFloat::Single(Symbol::intern(i), span) } // 1. - [IdentLike(i), Punct('.')] => { - let (ident_span, dot_span) = if can_take_span_apart() { - let (span, ident_len) = (span.data(), BytePos::from_usize(i.len())); - let ident_span = span.with_hi(span.lo + ident_len); - let dot_span = span.with_lo(span.lo + ident_len); - (ident_span, dot_span) + [IdentLike(left), Punct('.')] => { + let (left_span, dot_span) = if can_take_span_apart() { + let left_span = span.with_hi(span.lo() + BytePos::from_usize(left.len())); + let dot_span = span.with_lo(left_span.hi()); + (left_span, dot_span) } else { (span, span) }; - let symbol = Symbol::intern(i); - DestructuredFloat::TrailingDot(symbol, ident_span, dot_span) + let left = Symbol::intern(left); + DestructuredFloat::TrailingDot(left, left_span, dot_span) } // 1.2 | 1.2e3 - [IdentLike(i1), Punct('.'), IdentLike(i2)] => { - let (ident1_span, dot_span, ident2_span) = if can_take_span_apart() { - let (span, ident1_len) = (span.data(), BytePos::from_usize(i1.len())); - let ident1_span = span.with_hi(span.lo + ident1_len); - let dot_span = span - .with_lo(span.lo + ident1_len) - .with_hi(span.lo + ident1_len + BytePos(1)); - let ident2_span = self.token.span.with_lo(span.lo + ident1_len + BytePos(1)); - (ident1_span, dot_span, ident2_span) + [IdentLike(left), Punct('.'), IdentLike(right)] => { + let (left_span, dot_span, right_span) = if can_take_span_apart() { + let left_span = span.with_hi(span.lo() + BytePos::from_usize(left.len())); + let dot_span = span.with_lo(left_span.hi()).with_hi(left_span.hi() + BytePos(1)); + let right_span = span.with_lo(dot_span.hi()); + (left_span, dot_span, right_span) } else { (span, span, span) }; - let symbol1 = Symbol::intern(i1); - let symbol2 = Symbol::intern(i2); - DestructuredFloat::MiddleDot(symbol1, ident1_span, dot_span, symbol2, ident2_span) + let left = Symbol::intern(left); + let right = Symbol::intern(right); + DestructuredFloat::MiddleDot(left, left_span, dot_span, right, right_span) } // 1e+ | 1e- (recovered) [IdentLike(_), Punct('+' | '-')] | @@ -1116,8 +1111,8 @@ impl<'a> Parser<'a> { // 1.2e+3 | 1.2e-3 [IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-'), IdentLike(_)] => { // See the FIXME about `TokenCursor` above. - self.error_unexpected_after_dot(); - DestructuredFloat::Error + let guar = self.error_unexpected_after_dot(); + DestructuredFloat::Error(guar) } _ => panic!("unexpected components in a float token: {components:?}"), } @@ -1183,7 +1178,7 @@ impl<'a> Parser<'a> { fields.insert(start_idx, Ident::new(symbol2, span2)); fields.insert(start_idx, Ident::new(symbol1, span1)); } - DestructuredFloat::Error => { + DestructuredFloat::Error(_) => { trailing_dot = None; fields.insert(start_idx, Ident::new(symbol, self.prev_token.span)); } @@ -2050,7 +2045,7 @@ impl<'a> Parser<'a> { }; // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that // makes sense. - if let Some(ident) = self.token.lifetime() + if let Some((ident, IdentIsRaw::No)) = self.token.lifetime() && could_be_unclosed_char_literal(ident) { let lt = self.expect_lifetime(); @@ -2554,13 +2549,12 @@ impl<'a> Parser<'a> { let maybe_fatarrow = self.token.clone(); let block = if self.check(&token::OpenDelim(Delimiter::Brace)) { self.parse_block()? + } else if let Some(block) = recover_block_from_condition(self) { + block } else { - if let Some(block) = recover_block_from_condition(self) { - block - } else { - self.error_on_extra_if(&cond)?; - // Parse block, which will always fail, but we can add a nice note to the error - self.parse_block().map_err(|mut err| { + self.error_on_extra_if(&cond)?; + // Parse block, which will always fail, but we can add a nice note to the error + self.parse_block().map_err(|mut err| { if self.prev_token == token::Semi && self.token == token::AndAnd && let maybe_let = self.look_ahead(1, |t| t.clone()) @@ -2592,7 +2586,6 @@ impl<'a> Parser<'a> { } err })? - } }; self.error_on_if_block_attrs(lo, false, block.span, attrs); block @@ -2848,10 +2841,13 @@ impl<'a> Parser<'a> { .emit_err(errors::MissingExpressionInForLoop { span: expr.span.shrink_to_lo() }); let err_expr = self.mk_expr(expr.span, ExprKind::Err(guar)); let block = self.mk_block(thin_vec![], BlockCheckMode::Default, self.prev_token.span); - return Ok(self.mk_expr( - lo.to(self.prev_token.span), - ExprKind::ForLoop { pat, iter: err_expr, body: block, label: opt_label, kind }, - )); + return Ok(self.mk_expr(lo.to(self.prev_token.span), ExprKind::ForLoop { + pat, + iter: err_expr, + body: block, + label: opt_label, + kind, + })); } let (attrs, loop_block) = self.parse_inner_attrs_and_block()?; @@ -2925,9 +2921,9 @@ impl<'a> Parser<'a> { } pub(crate) fn eat_label(&mut self) -> Option<Label> { - if let Some(ident) = self.token.lifetime() { + if let Some((ident, is_raw)) = self.token.lifetime() { // Disallow `'fn`, but with a better error message than `expect_lifetime`. - if ident.without_first_quote().is_reserved() { + if matches!(is_raw, IdentIsRaw::No) && ident.without_first_quote().is_reserved() { self.dcx().emit_err(errors::InvalidLabel { span: ident.span, name: ident.name }); } diff --git a/compiler/rustc_parse/src/parser/generics.rs b/compiler/rustc_parse/src/parser/generics.rs index af3b6f740e3..b9256daa725 100644 --- a/compiler/rustc_parse/src/parser/generics.rs +++ b/compiler/rustc_parse/src/parser/generics.rs @@ -1,10 +1,10 @@ use ast::token::Delimiter; use rustc_ast::{ - self as ast, token, AttrVec, GenericBounds, GenericParam, GenericParamKind, TyKind, WhereClause, + self as ast, AttrVec, GenericBounds, GenericParam, GenericParamKind, TyKind, WhereClause, token, }; use rustc_errors::{Applicability, PResult}; -use rustc_span::symbol::{kw, Ident}; use rustc_span::Span; +use rustc_span::symbol::{Ident, kw}; use thin_vec::ThinVec; use super::{ForceCollect, Parser, Trailing, UsePreAttrPos}; diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 14da6c331f1..9fc82d84225 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -10,15 +10,15 @@ use rustc_ast::util::case::Case; use rustc_ast::{self as ast}; use rustc_ast_pretty::pprust; use rustc_errors::codes::*; -use rustc_errors::{struct_span_code_err, Applicability, PResult, StashKey}; +use rustc_errors::{Applicability, PResult, StashKey, struct_span_code_err}; use rustc_span::edit_distance::edit_distance; use rustc_span::edition::Edition; -use rustc_span::symbol::{kw, sym, Ident, Symbol}; -use rustc_span::{source_map, ErrorGuaranteed, Span, DUMMY_SP}; -use thin_vec::{thin_vec, ThinVec}; +use rustc_span::symbol::{Ident, Symbol, kw, sym}; +use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, source_map}; +use thin_vec::{ThinVec, thin_vec}; use tracing::debug; -use super::diagnostics::{dummy_arg, ConsumeClosingDelim}; +use super::diagnostics::{ConsumeClosingDelim, dummy_arg}; use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign}; use super::{ AttrWrapper, FollowedByType, ForceCollect, Parser, PathStyle, Trailing, UsePreAttrPos, @@ -51,7 +51,9 @@ impl<'a> Parser<'a> { } /// Parses the contents of a module (inner attributes followed by module items). - /// We exit once we hit `term` + /// We exit once we hit `term` which can be either + /// - EOF (for files) + /// - `}` for mod items pub fn parse_mod( &mut self, term: &TokenKind, @@ -455,7 +457,7 @@ impl<'a> Parser<'a> { fn parse_item_builtin(&mut self) -> PResult<'a, Option<ItemInfo>> { // To be expanded - return Ok(None); + Ok(None) } /// Parses an item macro, e.g., `item!();`. @@ -705,7 +707,7 @@ impl<'a> Parser<'a> { }) }; - let (ident, item_kind) = if self.eat(&token::PathSep) { + let (ident, item_kind) = if self.eat_path_sep() { let suffixes = if self.eat(&token::BinOp(token::Star)) { None } else { @@ -1052,7 +1054,7 @@ impl<'a> Parser<'a> { { // `use *;` or `use ::*;` or `use {...};` or `use ::{...};` let mod_sep_ctxt = self.token.span.ctxt(); - if self.eat(&token::PathSep) { + if self.eat_path_sep() { prefix .segments .push(PathSegment::path_root(lo.shrink_to_lo().with_ctxt(mod_sep_ctxt))); @@ -1063,7 +1065,7 @@ impl<'a> Parser<'a> { // `use path::*;` or `use path::{...};` or `use path;` or `use path as bar;` prefix = self.parse_path(PathStyle::Mod)?; - if self.eat(&token::PathSep) { + if self.eat_path_sep() { self.parse_use_tree_glob_or_nested()? } else { // Recover from using a colon as path separator. @@ -1586,7 +1588,7 @@ impl<'a> Parser<'a> { (thin_vec![], Recovered::Yes(guar)) } }; - VariantData::Struct { fields, recovered: recovered.into() } + VariantData::Struct { fields, recovered } } else if this.check(&token::OpenDelim(Delimiter::Parenthesis)) { let body = match this.parse_tuple_struct_body() { Ok(body) => body, @@ -1670,7 +1672,7 @@ impl<'a> Parser<'a> { class_name.span, generics.where_clause.has_where_token, )?; - VariantData::Struct { fields, recovered: recovered.into() } + VariantData::Struct { fields, recovered } } // No `where` so: `struct Foo<T>;` } else if self.eat(&token::Semi) { @@ -1682,7 +1684,7 @@ impl<'a> Parser<'a> { class_name.span, generics.where_clause.has_where_token, )?; - VariantData::Struct { fields, recovered: recovered.into() } + VariantData::Struct { fields, recovered } // Tuple-style struct definition with optional where-clause. } else if self.token == token::OpenDelim(Delimiter::Parenthesis) { let body = VariantData::Tuple(self.parse_tuple_struct_body()?, DUMMY_NODE_ID); @@ -1711,14 +1713,14 @@ impl<'a> Parser<'a> { class_name.span, generics.where_clause.has_where_token, )?; - VariantData::Struct { fields, recovered: recovered.into() } + VariantData::Struct { fields, recovered } } else if self.token == token::OpenDelim(Delimiter::Brace) { let (fields, recovered) = self.parse_record_struct_body( "union", class_name.span, generics.where_clause.has_where_token, )?; - VariantData::Struct { fields, recovered: recovered.into() } + VariantData::Struct { fields, recovered } } else { let token_str = super::token_descr(&self.token); let msg = format!("expected `where` or `{{` after union name, found {token_str}"); @@ -1895,10 +1897,10 @@ impl<'a> Parser<'a> { // Try to recover extra trailing angle brackets if let TyKind::Path(_, Path { segments, .. }) = &a_var.ty.kind { if let Some(last_segment) = segments.last() { - let guar = self.check_trailing_angle_brackets( - last_segment, - &[&token::Comma, &token::CloseDelim(Delimiter::Brace)], - ); + let guar = self.check_trailing_angle_brackets(last_segment, &[ + &token::Comma, + &token::CloseDelim(Delimiter::Brace), + ]); if let Some(_guar) = guar { // Handle a case like `Vec<u8>>,` where we can continue parsing fields // after the comma diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 61e3fa2e6c5..77ad4fdeeb1 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -27,17 +27,18 @@ use rustc_ast::tokenstream::{ }; use rustc_ast::util::case::Case; use rustc_ast::{ - self as ast, AnonConst, AttrArgs, AttrArgsEq, AttrId, ByRef, Const, CoroutineKind, DelimArgs, - Expr, ExprKind, Extern, HasAttrs, HasTokens, Mutability, Recovered, Safety, StrLit, Visibility, - VisibilityKind, DUMMY_NODE_ID, + self as ast, AnonConst, AttrArgs, AttrArgsEq, AttrId, ByRef, Const, CoroutineKind, + DUMMY_NODE_ID, DelimArgs, Expr, ExprKind, Extern, HasAttrs, HasTokens, Mutability, Recovered, + Safety, StrLit, Visibility, VisibilityKind, }; use rustc_ast_pretty::pprust; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult}; +use rustc_index::interval::IntervalSet; use rustc_session::parse::ParseSess; -use rustc_span::symbol::{kw, sym, Ident, Symbol}; -use rustc_span::{Span, DUMMY_SP}; +use rustc_span::symbol::{Ident, Symbol, kw, sym}; +use rustc_span::{DUMMY_SP, Span}; use thin_vec::ThinVec; use tracing::debug; @@ -145,21 +146,25 @@ pub struct Parser<'a> { token_cursor: TokenCursor, // The number of calls to `bump`, i.e. the position in the token stream. num_bump_calls: u32, - // During parsing we may sometimes need to 'unglue' a glued token into two - // component tokens (e.g. '>>' into '>' and '>), so the parser can consume - // them one at a time. This process bypasses the normal capturing mechanism - // (e.g. `num_bump_calls` will not be incremented), since the 'unglued' - // tokens due not exist in the original `TokenStream`. + // During parsing we may sometimes need to "unglue" a glued token into two + // or three component tokens (e.g. `>>` into `>` and `>`, or `>>=` into `>` + // and `>` and `=`), so the parser can consume them one at a time. This + // process bypasses the normal capturing mechanism (e.g. `num_bump_calls` + // will not be incremented), since the "unglued" tokens due not exist in + // the original `TokenStream`. // - // If we end up consuming both unglued tokens, this is not an issue. We'll - // end up capturing the single 'glued' token. + // If we end up consuming all the component tokens, this is not an issue, + // because we'll end up capturing the single "glued" token. // - // However, sometimes we may want to capture just the first 'unglued' + // However, sometimes we may want to capture not all of the original // token. For example, capturing the `Vec<u8>` in `Option<Vec<u8>>` // requires us to unglue the trailing `>>` token. The `break_last_token` - // field is used to track this token. It gets appended to the captured + // field is used to track these tokens. They get appended to the captured // stream when we evaluate a `LazyAttrTokenStream`. - break_last_token: bool, + // + // This value is always 0, 1, or 2. It can only reach 2 when splitting + // `>>=` or `<<=`. + break_last_token: u32, /// This field is used to keep track of how many left angle brackets we have seen. This is /// required in order to detect extra leading left angle brackets (`<` characters) and error /// appropriately. @@ -183,7 +188,7 @@ pub struct Parser<'a> { // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure // it doesn't unintentionally get bigger. #[cfg(target_pointer_width = "64")] -rustc_data_structures::static_assert_size!(Parser<'_>, 256); +rustc_data_structures::static_assert_size!(Parser<'_>, 288); /// Stores span information about a closure. #[derive(Clone, Debug)] @@ -238,7 +243,8 @@ impl NodeRange { // is the position of the function's start token. This gives // `NodeRange(10..15)`. fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange { - assert!(parser_range.start >= start_pos && parser_range.end >= start_pos); + assert!(!parser_range.is_empty()); + assert!(parser_range.start >= start_pos); NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos)) } } @@ -260,6 +266,9 @@ struct CaptureState { capturing: Capturing, parser_replacements: Vec<ParserReplacement>, inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>, + // `IntervalSet` is good for perf because attrs are mostly added to this + // set in contiguous ranges. + seen_attrs: IntervalSet<AttrId>, } /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that @@ -448,7 +457,7 @@ impl<'a> Parser<'a> { expected_tokens: Vec::new(), token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() }, num_bump_calls: 0, - break_last_token: false, + break_last_token: 0, unmatched_angle_bracket_count: 0, angle_bracket_nesting: 0, last_unexpected_token_span: None, @@ -457,6 +466,7 @@ impl<'a> Parser<'a> { capturing: Capturing::No, parser_replacements: Vec::new(), inner_attr_parser_ranges: Default::default(), + seen_attrs: IntervalSet::new(u32::MAX as usize), }, current_closure: None, recovery: Recovery::Allowed, @@ -767,7 +777,7 @@ impl<'a> Parser<'a> { self.bump(); return true; } - match self.token.kind.break_two_token_op() { + match self.token.kind.break_two_token_op(1) { Some((first, second)) if first == expected => { let first_span = self.psess.source_map().start_point(self.token.span); let second_span = self.token.span.with_lo(first_span.hi()); @@ -777,8 +787,8 @@ impl<'a> Parser<'a> { // // If we consume any additional tokens, then this token // is not needed (we'll capture the entire 'glued' token), - // and `bump` will set this field to `None` - self.break_last_token = true; + // and `bump` will set this field to 0. + self.break_last_token += 1; // Use the spacing of the glued token as the spacing of the // unglued second token. self.bump_with((Token::new(second, second_span), self.token_spacing)); @@ -1142,10 +1152,9 @@ impl<'a> Parser<'a> { // than `.0`/`.1` access. let mut next = self.token_cursor.inlined_next(); self.num_bump_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.break_last_token = false; + // We got a token from the underlying cursor and no longer need to + // worry about an unglued token. See `break_and_eat` for more details. + self.break_last_token = 0; if next.0.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. let fallback_span = self.token.span; @@ -1353,13 +1362,11 @@ impl<'a> Parser<'a> { fn parse_attr_args(&mut self) -> PResult<'a, AttrArgs> { Ok(if let Some(args) = self.parse_delim_args_inner() { AttrArgs::Delimited(args) + } else if self.eat(&token::Eq) { + let eq_span = self.prev_token.span; + AttrArgs::Eq(eq_span, AttrArgsEq::Ast(self.parse_expr_force_collect()?)) } else { - if self.eat(&token::Eq) { - let eq_span = self.prev_token.span; - AttrArgs::Eq(eq_span, AttrArgsEq::Ast(self.parse_expr_force_collect()?)) - } else { - AttrArgs::Empty - } + AttrArgs::Empty }) } @@ -1558,12 +1565,25 @@ impl<'a> Parser<'a> { }) } + /// Checks for `::` or, potentially, `:::` and then look ahead after it. + fn check_path_sep_and_look_ahead(&mut self, looker: impl Fn(&Token) -> bool) -> bool { + if self.check(&token::PathSep) { + if self.may_recover() && self.look_ahead(1, |t| t.kind == token::Colon) { + debug_assert!(!self.look_ahead(1, &looker), "Looker must not match on colon"); + self.look_ahead(2, looker) + } else { + self.look_ahead(1, looker) + } + } else { + false + } + } + /// `::{` or `::*` fn is_import_coupler(&mut self) -> bool { - self.check(&token::PathSep) - && self.look_ahead(1, |t| { - *t == token::OpenDelim(Delimiter::Brace) || *t == token::BinOp(token::Star) - }) + self.check_path_sep_and_look_ahead(|t| { + matches!(t.kind, token::OpenDelim(Delimiter::Brace) | token::BinOp(token::Star)) + }) } // Debug view of the parser's token stream, up to `{lookahead}` tokens. @@ -1666,7 +1686,7 @@ enum FlatToken { pub enum ParseNtResult { Tt(TokenTree), Ident(Ident, IdentIsRaw), - Lifetime(Ident), + Lifetime(Ident, IdentIsRaw), /// This case will eventually be removed, along with `Token::Interpolate`. Nt(Lrc<Nonterminal>), diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index 999f6f0eeb0..43c3de90d9d 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -1,13 +1,13 @@ +use rustc_ast::HasTokens; use rustc_ast::ptr::P; use rustc_ast::token::Nonterminal::*; use rustc_ast::token::NtExprKind::*; use rustc_ast::token::NtPatKind::*; use rustc_ast::token::{self, Delimiter, NonterminalKind, Token}; -use rustc_ast::HasTokens; use rustc_ast_pretty::pprust; use rustc_data_structures::sync::Lrc; use rustc_errors::PResult; -use rustc_span::symbol::{kw, Ident}; +use rustc_span::symbol::{Ident, kw}; use crate::errors::UnexpectedNonterminal; use crate::parser::pat::{CommaRecoveryMode, RecoverColon, RecoverComma}; @@ -86,27 +86,9 @@ impl<'a> Parser<'a> { token::Interpolated(nt) => may_be_ident(nt), _ => false, }, - NonterminalKind::Pat(pat_kind) => match &token.kind { - // box, ref, mut, and other identifiers (can stricten) - token::Ident(..) | token::NtIdent(..) | - token::OpenDelim(Delimiter::Parenthesis) | // tuple pattern - token::OpenDelim(Delimiter::Bracket) | // slice pattern - token::BinOp(token::And) | // reference - token::BinOp(token::Minus) | // negative literal - token::AndAnd | // double reference - token::Literal(_) | // literal - token::DotDot | // range pattern (future compat) - token::DotDotDot | // range pattern (future compat) - token::PathSep | // path - token::Lt | // path (UFCS constant) - token::BinOp(token::Shl) => true, // path (double UFCS) - // leading vert `|` or-pattern - token::BinOp(token::Or) => matches!(pat_kind, PatWithOr), - token::Interpolated(nt) => may_be_ident(nt), - _ => false, - }, + NonterminalKind::Pat(pat_kind) => token.can_begin_pattern(pat_kind), NonterminalKind::Lifetime => match &token.kind { - token::Lifetime(_) | token::NtLifetime(..) => true, + token::Lifetime(..) | token::NtLifetime(..) => true, _ => false, }, NonterminalKind::TT | NonterminalKind::Item | NonterminalKind::Stmt => { @@ -189,9 +171,9 @@ impl<'a> Parser<'a> { NonterminalKind::Lifetime => { // We want to keep `'keyword` parsing, just like `keyword` is still // an ident for nonterminal purposes. - return if let Some(ident) = self.token.lifetime() { + return if let Some((ident, is_raw)) = self.token.lifetime() { self.bump(); - Ok(ParseNtResult::Lifetime(ident)) + Ok(ParseNtResult::Lifetime(ident, is_raw)) } else { Err(self.dcx().create_err(UnexpectedNonterminal::Lifetime { span: self.token.span, diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index cc68ae237ba..bd2ffaa0a89 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -1,17 +1,19 @@ -use rustc_ast::mut_visit::{walk_pat, MutVisitor}; +use rustc_ast::mut_visit::{self, MutVisitor}; use rustc_ast::ptr::P; -use rustc_ast::token::{self, BinOpToken, Delimiter, Token}; +use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token}; +use rustc_ast::visit::{self, Visitor}; use rustc_ast::{ - self as ast, AttrVec, BindingMode, ByRef, Expr, ExprKind, MacCall, Mutability, Pat, PatField, - PatFieldsRest, PatKind, Path, QSelf, RangeEnd, RangeSyntax, + self as ast, Arm, AttrVec, BinOpKind, BindingMode, ByRef, Expr, ExprKind, ExprPrecedence, + LocalKind, MacCall, Mutability, Pat, PatField, PatFieldsRest, PatKind, Path, QSelf, RangeEnd, + RangeSyntax, Stmt, StmtKind, }; use rustc_ast_pretty::pprust; -use rustc_errors::{Applicability, Diag, PResult}; +use rustc_errors::{Applicability, Diag, DiagArgValue, PResult, StashKey}; use rustc_session::errors::ExprParenthesesNeeded; -use rustc_span::source_map::{respan, Spanned}; -use rustc_span::symbol::{kw, sym, Ident}; +use rustc_span::source_map::{Spanned, respan}; +use rustc_span::symbol::{Ident, kw, sym}; use rustc_span::{BytePos, ErrorGuaranteed, Span}; -use thin_vec::{thin_vec, ThinVec}; +use thin_vec::{ThinVec, thin_vec}; use super::{ForceCollect, Parser, PathStyle, Restrictions, Trailing, UsePreAttrPos}; use crate::errors::{ @@ -21,11 +23,11 @@ use crate::errors::{ InclusiveRangeExtraEquals, InclusiveRangeMatchArrow, InclusiveRangeNoEnd, InvalidMutInPattern, ParenRangeSuggestion, PatternOnWrongSideOfAt, RemoveLet, RepeatedMutInPattern, SwitchRefBoxOrder, TopLevelOrPatternNotAllowed, TopLevelOrPatternNotAllowedSugg, - TrailingVertNotAllowed, UnexpectedExpressionInPattern, UnexpectedLifetimeInPattern, - UnexpectedParenInRangePat, UnexpectedParenInRangePatSugg, + TrailingVertNotAllowed, UnexpectedExpressionInPattern, UnexpectedExpressionInPatternSugg, + UnexpectedLifetimeInPattern, UnexpectedParenInRangePat, UnexpectedParenInRangePatSugg, UnexpectedVertVertBeforeFunctionParam, UnexpectedVertVertInPattern, WrapInParens, }; -use crate::parser::expr::could_be_unclosed_char_literal; +use crate::parser::expr::{DestructuredFloat, could_be_unclosed_char_literal}; use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; #[derive(PartialEq, Copy, Clone)] @@ -342,7 +344,7 @@ impl<'a> Parser<'a> { } } - /// Ensures that the last parsed pattern (or pattern range bound) is not followed by a method call or an operator. + /// Ensures that the last parsed pattern (or pattern range bound) is not followed by an expression. /// /// `is_end_bound` indicates whether the last parsed thing was the end bound of a range pattern (see [`parse_pat_range_end`](Self::parse_pat_range_end)) /// in order to say "expected a pattern range bound" instead of "expected a pattern"; @@ -350,38 +352,64 @@ impl<'a> Parser<'a> { /// 0..=1 + 2 /// ^^^^^ /// ``` - /// Only the end bound is spanned, and this function have no idea if there were a `..=` before `pat_span`, hence the parameter. + /// Only the end bound is spanned in this case, and this function has no idea if there was a `..=` before `pat_span`, hence the parameter. + /// + /// This function returns `Some` if a trailing expression was recovered, and said expression's span. #[must_use = "the pattern must be discarded as `PatKind::Err` if this function returns Some"] fn maybe_recover_trailing_expr( &mut self, pat_span: Span, is_end_bound: bool, - ) -> Option<ErrorGuaranteed> { + ) -> Option<(ErrorGuaranteed, Span)> { if self.prev_token.is_keyword(kw::Underscore) || !self.may_recover() { // Don't recover anything after an `_` or if recovery is disabled. return None; } - // Check for `.hello()`, but allow `.Hello()` to be recovered as `, Hello()` in `parse_seq_to_before_tokens()`. - let has_trailing_method = self.check_noexpect(&token::Dot) + // Returns `true` iff `token` is an unsuffixed integer. + let is_one_tuple_index = |_: &Self, token: &Token| -> bool { + use token::{Lit, LitKind}; + + matches!( + token.kind, + token::Literal(Lit { kind: LitKind::Integer, symbol: _, suffix: None }) + ) + }; + + // Returns `true` iff `token` is an unsuffixed `x.y` float. + let is_two_tuple_indexes = |this: &Self, token: &Token| -> bool { + use token::{Lit, LitKind}; + + if let token::Literal(Lit { kind: LitKind::Float, symbol, suffix: None }) = token.kind + && let DestructuredFloat::MiddleDot(..) = this.break_up_float(symbol, token.span) + { + true + } else { + false + } + }; + + // Check for `.hello` or `.0`. + let has_dot_expr = self.check_noexpect(&token::Dot) // `.` && self.look_ahead(1, |tok| { - tok.ident() - .and_then(|(ident, _)| ident.name.as_str().chars().next()) - .is_some_and(char::is_lowercase) - }) - && self.look_ahead(2, |t| *t == token::OpenDelim(Delimiter::Parenthesis)); + tok.is_ident() // `hello` + || is_one_tuple_index(&self, &tok) // `0` + || is_two_tuple_indexes(&self, &tok) // `0.0` + }); // Check for operators. // `|` is excluded as it is used in pattern alternatives and lambdas, // `?` is included for error propagation, // `[` is included for indexing operations, - // `[]` is excluded as `a[]` isn't an expression and should be recovered as `a, []` (cf. `tests/ui/parser/pat-lt-bracket-7.rs`) + // `[]` is excluded as `a[]` isn't an expression and should be recovered as `a, []` (cf. `tests/ui/parser/pat-lt-bracket-7.rs`), + // `as` is included for type casts let has_trailing_operator = matches!(self.token.kind, token::BinOp(op) if op != BinOpToken::Or) || self.token == token::Question || (self.token == token::OpenDelim(Delimiter::Bracket) - && self.look_ahead(1, |t| *t != token::CloseDelim(Delimiter::Bracket))); + && self.look_ahead(1, |t| *t != token::CloseDelim(Delimiter::Bracket))) // excludes `[]` + || self.token.is_keyword(kw::As); - if !has_trailing_method && !has_trailing_operator { + if !has_dot_expr && !has_trailing_operator { // Nothing to recover here. return None; } @@ -391,44 +419,248 @@ impl<'a> Parser<'a> { snapshot.restrictions.insert(Restrictions::IS_PAT); // Parse `?`, `.f`, `(arg0, arg1, ...)` or `[expr]` until they've all been eaten. - if let Ok(expr) = snapshot + let Ok(expr) = snapshot .parse_expr_dot_or_call_with( AttrVec::new(), self.mk_expr(pat_span, ExprKind::Dummy), // equivalent to transforming the parsed pattern into an `Expr` pat_span, ) .map_err(|err| err.cancel()) - { - let non_assoc_span = expr.span; + else { + // We got a trailing method/operator, but that wasn't an expression. + return None; + }; - // Parse an associative expression such as `+ expr`, `% expr`, ... - // Assignements, ranges and `|` are disabled by [`Restrictions::IS_PAT`]. - if let Ok((expr, _)) = - snapshot.parse_expr_assoc_rest_with(0, false, expr).map_err(|err| err.cancel()) - { - // We got a valid expression. - self.restore_snapshot(snapshot); - self.restrictions.remove(Restrictions::IS_PAT); + // Parse an associative expression such as `+ expr`, `% expr`, ... + // Assignments, ranges and `|` are disabled by [`Restrictions::IS_PAT`]. + let Ok((expr, _)) = + snapshot.parse_expr_assoc_rest_with(0, false, expr).map_err(|err| err.cancel()) + else { + // We got a trailing method/operator, but that wasn't an expression. + return None; + }; - let is_bound = is_end_bound - // is_start_bound: either `..` or `)..` - || self.token.is_range_separator() - || self.token == token::CloseDelim(Delimiter::Parenthesis) - && self.look_ahead(1, Token::is_range_separator); + // We got a valid expression. + self.restore_snapshot(snapshot); + self.restrictions.remove(Restrictions::IS_PAT); - // Check that `parse_expr_assoc_with` didn't eat a rhs. - let is_method_call = has_trailing_method && non_assoc_span == expr.span; + let is_bound = is_end_bound + // is_start_bound: either `..` or `)..` + || self.token.is_range_separator() + || self.token == token::CloseDelim(Delimiter::Parenthesis) + && self.look_ahead(1, Token::is_range_separator); - return Some(self.dcx().emit_err(UnexpectedExpressionInPattern { - span: expr.span, + let span = expr.span; + + Some(( + self.dcx() + .create_err(UnexpectedExpressionInPattern { + span, is_bound, - is_method_call, - })); + expr_precedence: expr.precedence().order(), + }) + .stash(span, StashKey::ExprInPat) + .unwrap(), + span, + )) + } + + /// Called by [`Parser::parse_stmt_without_recovery`], used to add statement-aware subdiagnostics to the errors stashed + /// by [`Parser::maybe_recover_trailing_expr`]. + pub(super) fn maybe_augment_stashed_expr_in_pats_with_suggestions(&mut self, stmt: &Stmt) { + if self.dcx().has_errors().is_none() { + // No need to walk the statement if there's no stashed errors. + return; + } + + struct PatVisitor<'a> { + /// `self` + parser: &'a Parser<'a>, + /// The freshly-parsed statement. + stmt: &'a Stmt, + /// The current match arm (for arm guard suggestions). + arm: Option<&'a Arm>, + /// The current struct field (for variable name suggestions). + field: Option<&'a PatField>, + } + + impl<'a> PatVisitor<'a> { + /// Looks for stashed [`StashKey::ExprInPat`] errors in `stash_span`, and emit them with suggestions. + /// `stash_span` is contained in `expr_span`, the latter being larger in borrow patterns; + /// ```txt + /// &mut x.y + /// -----^^^ `stash_span` + /// | + /// `expr_span` + /// ``` + /// `is_range_bound` is used to exclude arm guard suggestions in range pattern bounds. + fn maybe_add_suggestions_then_emit( + &self, + stash_span: Span, + expr_span: Span, + is_range_bound: bool, + ) { + self.parser.dcx().try_steal_modify_and_emit_err( + stash_span, + StashKey::ExprInPat, + |err| { + // Includes pre-pats (e.g. `&mut <err>`) in the diagnostic. + err.span.replace(stash_span, expr_span); + + let sm = self.parser.psess.source_map(); + let stmt = self.stmt; + let line_lo = sm.span_extend_to_line(stmt.span).shrink_to_lo(); + let indentation = sm.indentation_before(stmt.span).unwrap_or_default(); + let Ok(expr) = self.parser.span_to_snippet(expr_span) else { + // FIXME: some suggestions don't actually need the snippet; see PR #123877's unresolved conversations. + return; + }; + + if let StmtKind::Let(local) = &stmt.kind { + match &local.kind { + LocalKind::Decl | LocalKind::Init(_) => { + // It's kinda hard to guess what the user intended, so don't make suggestions. + return; + } + + LocalKind::InitElse(_, _) => {} + } + } + + // help: use an arm guard `if val == expr` + // FIXME(guard_patterns): suggest this regardless of a match arm. + if let Some(arm) = &self.arm + && !is_range_bound + { + let (ident, ident_span) = match self.field { + Some(field) => { + (field.ident.to_string(), field.ident.span.to(expr_span)) + } + None => ("val".to_owned(), expr_span), + }; + + // Are parentheses required around `expr`? + // HACK: a neater way would be preferable. + let expr = match &err.args["expr_precedence"] { + DiagArgValue::Number(expr_precedence) => { + if *expr_precedence + <= ExprPrecedence::Binary(BinOpKind::Eq).order() as i32 + { + format!("({expr})") + } else { + format!("{expr}") + } + } + _ => unreachable!(), + }; + + match &arm.guard { + None => { + err.subdiagnostic( + UnexpectedExpressionInPatternSugg::CreateGuard { + ident_span, + pat_hi: arm.pat.span.shrink_to_hi(), + ident, + expr, + }, + ); + } + Some(guard) => { + // Are parentheses required around the old guard? + let wrap_guard = guard.precedence().order() + <= ExprPrecedence::Binary(BinOpKind::And).order(); + + err.subdiagnostic( + UnexpectedExpressionInPatternSugg::UpdateGuard { + ident_span, + guard_lo: if wrap_guard { + Some(guard.span.shrink_to_lo()) + } else { + None + }, + guard_hi: guard.span.shrink_to_hi(), + guard_hi_paren: if wrap_guard { ")" } else { "" }, + ident, + expr, + }, + ); + } + } + } + + // help: extract the expr into a `const VAL: _ = expr` + let ident = match self.field { + Some(field) => field.ident.as_str().to_uppercase(), + None => "VAL".to_owned(), + }; + err.subdiagnostic(UnexpectedExpressionInPatternSugg::Const { + stmt_lo: line_lo, + ident_span: expr_span, + expr, + ident, + indentation, + }); + + // help: wrap the expr in a `const { expr }` + // FIXME(inline_const_pat): once stabilized, remove this check and remove the `(requires #[feature(inline_const_pat)])` note from the message + if self.parser.psess.unstable_features.is_nightly_build() { + err.subdiagnostic(UnexpectedExpressionInPatternSugg::InlineConst { + start_span: expr_span.shrink_to_lo(), + end_span: expr_span.shrink_to_hi(), + }); + } + }, + ); + } + } + + impl<'a> Visitor<'a> for PatVisitor<'a> { + fn visit_arm(&mut self, a: &'a Arm) -> Self::Result { + self.arm = Some(a); + visit::walk_arm(self, a); + self.arm = None; + } + + fn visit_pat_field(&mut self, fp: &'a PatField) -> Self::Result { + self.field = Some(fp); + visit::walk_pat_field(self, fp); + self.field = None; + } + + fn visit_pat(&mut self, p: &'a Pat) -> Self::Result { + match &p.kind { + // Base expression + PatKind::Err(_) | PatKind::Lit(_) => { + self.maybe_add_suggestions_then_emit(p.span, p.span, false) + } + + // Sub-patterns + // FIXME: this doesn't work with recursive subpats (`&mut &mut <err>`) + PatKind::Box(subpat) | PatKind::Ref(subpat, _) + if matches!(subpat.kind, PatKind::Err(_) | PatKind::Lit(_)) => + { + self.maybe_add_suggestions_then_emit(subpat.span, p.span, false) + } + + // Sub-expressions + PatKind::Range(start, end, _) => { + if let Some(start) = start { + self.maybe_add_suggestions_then_emit(start.span, start.span, true); + } + + if let Some(end) = end { + self.maybe_add_suggestions_then_emit(end.span, end.span, true); + } + } + + // Walk continuation + _ => visit::walk_pat(self, p), + } } } - // We got a trailing method/operator, but we couldn't parse an expression. - None + // Starts the visit. + PatVisitor { parser: self, stmt, arm: None, field: None }.visit_stmt(stmt); } /// Parses a pattern, with a setting whether modern range patterns (e.g., `a..=b`, `a..b` are @@ -444,7 +676,11 @@ impl<'a> Parser<'a> { let mut lo = self.token.span; - if self.token.is_keyword(kw::Let) && self.look_ahead(1, |tok| tok.can_begin_pattern()) { + if self.token.is_keyword(kw::Let) + && self.look_ahead(1, |tok| { + tok.can_begin_pattern(token::NtPatKind::PatParam { inferred: false }) + }) + { self.bump(); self.dcx().emit_err(RemoveLet { span: lo }); lo = self.token.span; @@ -540,11 +776,11 @@ impl<'a> Parser<'a> { self.parse_pat_tuple_struct(qself, path)? } else { match self.maybe_recover_trailing_expr(span, false) { - Some(guar) => PatKind::Err(guar), + Some((guar, _)) => PatKind::Err(guar), None => PatKind::Path(qself, path), } } - } else if let Some(lt) = self.token.lifetime() + } else if let Some((lt, IdentIsRaw::No)) = self.token.lifetime() // In pattern position, we're totally fine with using "next token isn't colon" // as a heuristic. We could probably just always try to recover if it's a lifetime, // because we never have `'a: label {}` in a pattern position anyways, but it does @@ -573,10 +809,10 @@ impl<'a> Parser<'a> { // Try to parse everything else as literal with optional minus match self.parse_literal_maybe_minus() { Ok(begin) => { - let begin = match self.maybe_recover_trailing_expr(begin.span, false) { - Some(guar) => self.mk_expr_err(begin.span, guar), - None => begin, - }; + let begin = self + .maybe_recover_trailing_expr(begin.span, false) + .map(|(guar, sp)| self.mk_expr_err(sp, guar)) + .unwrap_or(begin); match self.parse_range_end() { Some(form) => self.parse_pat_range_begin_with(begin, form)?, @@ -685,7 +921,7 @@ impl<'a> Parser<'a> { /// Parse `&pat` / `&mut pat`. fn parse_pat_deref(&mut self, expected: Option<Expected>) -> PResult<'a, PatKind> { self.expect_and()?; - if let Some(lifetime) = self.token.lifetime() { + if let Some((lifetime, _)) = self.token.lifetime() { self.bump(); // `'a` self.dcx().emit_err(UnexpectedLifetimeInPattern { @@ -717,7 +953,8 @@ impl<'a> Parser<'a> { // For backward compatibility, `(..)` is a tuple pattern as well. let paren_pattern = fields.len() == 1 && !(matches!(trailing_comma, Trailing::Yes) || fields[0].is_rest()); - if paren_pattern { + + let pat = if paren_pattern { let pat = fields.into_iter().next().unwrap(); let close_paren = self.prev_token.span; @@ -735,7 +972,7 @@ impl<'a> Parser<'a> { }, }); - self.parse_pat_range_begin_with(begin.clone(), form) + self.parse_pat_range_begin_with(begin.clone(), form)? } // recover ranges with parentheses around the `(start)..` PatKind::Err(guar) @@ -750,15 +987,20 @@ impl<'a> Parser<'a> { }, }); - self.parse_pat_range_begin_with(self.mk_expr_err(pat.span, *guar), form) + self.parse_pat_range_begin_with(self.mk_expr_err(pat.span, *guar), form)? } // (pat) with optional parentheses - _ => Ok(PatKind::Paren(pat)), + _ => PatKind::Paren(pat), } } else { - Ok(PatKind::Tuple(fields)) - } + PatKind::Tuple(fields) + }; + + Ok(match self.maybe_recover_trailing_expr(open_paren.to(self.prev_token.span), false) { + None => pat, + Some((guar, _)) => PatKind::Err(guar), + }) } /// Parse a mutable binding with the `mut` token already eaten. @@ -812,7 +1054,7 @@ impl<'a> Parser<'a> { self.0 = true; *m = Mutability::Mut; } - walk_pat(self, pat); + mut_visit::walk_pat(self, pat); } } @@ -1011,7 +1253,7 @@ impl<'a> Parser<'a> { } Ok(match recovered { - Some(guar) => self.mk_expr_err(bound.span, guar), + Some((guar, sp)) => self.mk_expr_err(sp, guar), None => bound, }) } @@ -1080,7 +1322,7 @@ impl<'a> Parser<'a> { // but not `ident @ subpat` as `subpat` was already checked and `ident` continues with `@`. let pat = if sub.is_none() - && let Some(guar) = self.maybe_recover_trailing_expr(ident.span, false) + && let Some((guar, _)) = self.maybe_recover_trailing_expr(ident.span, false) { PatKind::Err(guar) } else { @@ -1332,21 +1574,19 @@ impl<'a> Parser<'a> { vec![(first_etc_span, String::new())], Applicability::MachineApplicable, ); - } else { - if let Some(last_non_comma_dotdot_span) = last_non_comma_dotdot_span { - // We have `.., x`. - err.multipart_suggestion( - "move the `..` to the end of the field list", - vec![ - (first_etc_span, String::new()), - ( - self.token.span.to(last_non_comma_dotdot_span.shrink_to_hi()), - format!("{} .. }}", if ate_comma { "" } else { "," }), - ), - ], - Applicability::MachineApplicable, - ); - } + } else if let Some(last_non_comma_dotdot_span) = last_non_comma_dotdot_span { + // We have `.., x`. + err.multipart_suggestion( + "move the `..` to the end of the field list", + vec![ + (first_etc_span, String::new()), + ( + self.token.span.to(last_non_comma_dotdot_span.shrink_to_hi()), + format!("{} .. }}", if ate_comma { "" } else { "," }), + ), + ], + Applicability::MachineApplicable, + ); } } err.emit(); diff --git a/compiler/rustc_parse/src/parser/path.rs b/compiler/rustc_parse/src/parser/path.rs index d8bf10e6021..162ff3b94de 100644 --- a/compiler/rustc_parse/src/parser/path.rs +++ b/compiler/rustc_parse/src/parser/path.rs @@ -9,14 +9,14 @@ use rustc_ast::{ Path, PathSegment, QSelf, }; use rustc_errors::{Applicability, Diag, PResult}; -use rustc_span::symbol::{kw, sym, Ident}; +use rustc_span::symbol::{Ident, kw, sym}; use rustc_span::{BytePos, Span}; use thin_vec::ThinVec; use tracing::debug; use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign}; use super::{Parser, Restrictions, TokenType}; -use crate::errors::PathSingleColon; +use crate::errors::{PathSingleColon, PathTripleColon}; use crate::parser::{CommaRecoveryMode, RecoverColon, RecoverComma}; use crate::{errors, maybe_whole}; @@ -107,10 +107,11 @@ impl<'a> Parser<'a> { self.parse_path_segments(&mut path.segments, style, None)?; } - Ok(( - qself, - Path { segments: path.segments, span: lo.to(self.prev_token.span), tokens: None }, - )) + Ok((qself, Path { + segments: path.segments, + span: lo.to(self.prev_token.span), + tokens: None, + })) } /// Recover from an invalid single colon, when the user likely meant a qualified path. @@ -210,7 +211,7 @@ impl<'a> Parser<'a> { let lo = self.token.span; let mut segments = ThinVec::new(); let mod_sep_ctxt = self.token.span.ctxt(); - if self.eat(&token::PathSep) { + if self.eat_path_sep() { segments.push(PathSegment::path_root(lo.shrink_to_lo().with_ctxt(mod_sep_ctxt))); } self.parse_path_segments(&mut segments, style, ty_generics)?; @@ -246,7 +247,7 @@ impl<'a> Parser<'a> { } segments.push(segment); - if self.is_import_coupler() || !self.eat(&token::PathSep) { + if self.is_import_coupler() || !self.eat_path_sep() { if style == PathStyle::Expr && self.may_recover() && self.token == token::Colon @@ -272,6 +273,18 @@ impl<'a> Parser<'a> { } } + /// Eat `::` or, potentially, `:::`. + #[must_use] + pub(super) fn eat_path_sep(&mut self) -> bool { + let result = self.eat(&token::PathSep); + if result && self.may_recover() { + if self.eat_noexpect(&token::Colon) { + self.dcx().emit_err(PathTripleColon { span: self.prev_token.span }); + } + } + result + } + pub(super) fn parse_path_segment( &mut self, style: PathStyle, @@ -297,9 +310,7 @@ impl<'a> Parser<'a> { Ok( if style == PathStyle::Type && check_args_start(self) - || style != PathStyle::Mod - && self.check(&token::PathSep) - && self.look_ahead(1, |t| is_args_start(t)) + || style != PathStyle::Mod && self.check_path_sep_and_look_ahead(is_args_start) { // We use `style == PathStyle::Expr` to check if this is in a recursion or not. If // it isn't, then we reset the unmatched angle bracket count as we're about to start @@ -310,7 +321,8 @@ impl<'a> Parser<'a> { // Generic arguments are found - `<`, `(`, `::<` or `::(`. // First, eat `::` if it exists. - let _ = self.eat(&token::PathSep); + let _ = self.eat_path_sep(); + let lo = self.token.span; let args = if self.eat_lt() { // `<'a, T, A = U>` @@ -378,7 +390,10 @@ impl<'a> Parser<'a> { if self.may_recover() && prev_token_before_parsing == token::PathSep && (style == PathStyle::Expr && self.token.can_begin_expr() - || style == PathStyle::Pat && self.token.can_begin_pattern()) + || style == PathStyle::Pat + && self.token.can_begin_pattern(token::NtPatKind::PatParam { + inferred: false, + })) { snapshot = Some(self.create_snapshot_for_diagnostic()); } @@ -473,16 +488,13 @@ impl<'a> Parser<'a> { error.span_suggestion_verbose( prev_token_before_parsing.span, - format!( - "consider removing the `::` here to {}", - match style { - PathStyle::Expr => "call the expression", - PathStyle::Pat => "turn this into a tuple struct pattern", - _ => { - return; - } + format!("consider removing the `::` here to {}", match style { + PathStyle::Expr => "call the expression", + PathStyle::Pat => "turn this into a tuple struct pattern", + _ => { + return; } - ), + }), "", Applicability::MaybeIncorrect, ); @@ -668,12 +680,12 @@ impl<'a> Parser<'a> { err.emit(); continue; } - if !self.token.kind.should_end_const_arg() { - if self.handle_ambiguous_unbraced_const_arg(&mut args)? { - // We've managed to (partially) recover, so continue trying to parse - // arguments. - continue; - } + if !self.token.kind.should_end_const_arg() + && self.handle_ambiguous_unbraced_const_arg(&mut args)? + { + // We've managed to (partially) recover, so continue trying to parse + // arguments. + continue; } break; } diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 69044192780..b7cdae3e3e1 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -7,13 +7,13 @@ use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, TokenKind}; use rustc_ast::util::classify::{self, TrailingBrace}; use rustc_ast::{ - AttrStyle, AttrVec, Block, BlockCheckMode, Expr, ExprKind, HasAttrs, Local, LocalKind, MacCall, - MacCallStmt, MacStmtStyle, Recovered, Stmt, StmtKind, DUMMY_NODE_ID, + AttrStyle, AttrVec, Block, BlockCheckMode, DUMMY_NODE_ID, Expr, ExprKind, HasAttrs, Local, + LocalKind, MacCall, MacCallStmt, MacStmtStyle, Recovered, Stmt, StmtKind, }; use rustc_errors::{Applicability, Diag, PResult}; -use rustc_span::symbol::{kw, sym, Ident}; +use rustc_span::symbol::{Ident, kw, sym}; use rustc_span::{BytePos, ErrorGuaranteed, Span}; -use thin_vec::{thin_vec, ThinVec}; +use thin_vec::{ThinVec, thin_vec}; use super::attr::InnerAttrForbiddenReason; use super::diagnostics::AttemptLocalParseRecovery; @@ -29,6 +29,9 @@ use crate::{errors, maybe_whole}; impl<'a> Parser<'a> { /// Parses a statement. This stops just before trailing semicolons on everything but items. /// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed. + /// + /// If `force_collect` is [`ForceCollect::Yes`], forces collection of tokens regardless of + /// whether or not we have attributes. // Public for rustfmt usage. pub(super) fn parse_stmt(&mut self, force_collect: ForceCollect) -> PResult<'a, Option<Stmt>> { Ok(self.parse_stmt_without_recovery(false, force_collect).unwrap_or_else(|e| { @@ -66,7 +69,7 @@ impl<'a> Parser<'a> { }); } - Ok(Some(if self.token.is_keyword(kw::Let) { + let stmt = if self.token.is_keyword(kw::Let) { self.collect_tokens(None, attrs, force_collect, |this, attrs| { this.expect_keyword(kw::Let)?; let local = this.parse_local(attrs)?; @@ -163,7 +166,10 @@ impl<'a> Parser<'a> { } else { self.error_outer_attrs(attrs); return Ok(None); - })) + }; + + self.maybe_augment_stashed_expr_in_pats_with_suggestions(&stmt); + Ok(Some(stmt)) } fn parse_stmt_path_start(&mut self, lo: Span, attrs: AttrWrapper) -> PResult<'a, Stmt> { @@ -411,20 +417,14 @@ impl<'a> Parser<'a> { fn check_let_else_init_trailing_brace(&self, init: &ast::Expr) { if let Some(trailing) = classify::expr_trailing_brace(init) { let (span, sugg) = match trailing { - TrailingBrace::MacCall(mac) => ( - mac.span(), - errors::WrapInParentheses::MacroArgs { - left: mac.args.dspan.open, - right: mac.args.dspan.close, - }, - ), - TrailingBrace::Expr(expr) => ( - expr.span, - errors::WrapInParentheses::Expression { - left: expr.span.shrink_to_lo(), - right: expr.span.shrink_to_hi(), - }, - ), + TrailingBrace::MacCall(mac) => (mac.span(), errors::WrapInParentheses::MacroArgs { + left: mac.args.dspan.open, + right: mac.args.dspan.close, + }), + TrailingBrace::Expr(expr) => (expr.span, errors::WrapInParentheses::Expression { + left: expr.span.shrink_to_lo(), + right: expr.span.shrink_to_hi(), + }), }; self.dcx().emit_err(errors::InvalidCurlyInLetElse { span: span.with_lo(span.hi() - BytePos(1)), @@ -459,11 +459,16 @@ impl<'a> Parser<'a> { pub fn parse_block(&mut self) -> PResult<'a, P<Block>> { let (attrs, block) = self.parse_inner_attrs_and_block()?; if let [.., last] = &*attrs { + let suggest_to_outer = match &last.kind { + ast::AttrKind::Normal(attr) => attr.item.is_valid_for_outer_style(), + _ => false, + }; self.error_on_forbidden_inner_attr( last.span, super::attr::InnerAttrPolicy::Forbidden(Some( InnerAttrForbiddenReason::InCodeBlock, )), + suggest_to_outer, ); } Ok(block) diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index cb8e8d30988..92684505ab0 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -9,15 +9,15 @@ use ast::token::IdentIsRaw; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token}; use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree}; -use rustc_ast::{self as ast, visit, PatKind}; +use rustc_ast::{self as ast, PatKind, visit}; use rustc_ast_pretty::pprust::item_to_string; use rustc_data_structures::sync::Lrc; use rustc_errors::emitter::HumanEmitter; use rustc_errors::{DiagCtxt, MultiSpan, PResult}; use rustc_session::parse::ParseSess; use rustc_span::source_map::{FilePathMapping, SourceMap}; -use rustc_span::symbol::{kw, sym, Symbol}; -use rustc_span::{create_default_session_globals_then, BytePos, FileName, Pos, Span}; +use rustc_span::symbol::{Symbol, kw, sym}; +use rustc_span::{BytePos, FileName, Pos, Span, create_default_session_globals_then}; use termcolor::WriteColor; use crate::parser::{ForceCollect, Parser}; diff --git a/compiler/rustc_parse/src/parser/tokenstream/tests.rs b/compiler/rustc_parse/src/parser/tokenstream/tests.rs index d518dfee2b2..efe266f5290 100644 --- a/compiler/rustc_parse/src/parser/tokenstream/tests.rs +++ b/compiler/rustc_parse/src/parser/tokenstream/tests.rs @@ -1,6 +1,6 @@ use rustc_ast::token::{self, IdentIsRaw}; use rustc_ast::tokenstream::{TokenStream, TokenTree}; -use rustc_span::{create_default_session_globals_then, BytePos, Span, Symbol}; +use rustc_span::{BytePos, Span, Symbol, create_default_session_globals_then}; use crate::parser::tests::string_to_stream; diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs index 52f0b1c1b04..a8ed8b5df9c 100644 --- a/compiler/rustc_parse/src/parser/ty.rs +++ b/compiler/rustc_parse/src/parser/ty.rs @@ -1,15 +1,15 @@ use rustc_ast::ptr::P; -use rustc_ast::token::{self, BinOpToken, Delimiter, Token, TokenKind}; +use rustc_ast::token::{self, BinOpToken, Delimiter, IdentIsRaw, Token, TokenKind}; use rustc_ast::util::case::Case; use rustc_ast::{ - self as ast, BareFnTy, BoundAsyncness, BoundConstness, BoundPolarity, FnRetTy, GenericBound, - GenericBounds, GenericParam, Generics, Lifetime, MacCall, MutTy, Mutability, PolyTraitRef, - PreciseCapturingArg, TraitBoundModifiers, TraitObjectSyntax, Ty, TyKind, DUMMY_NODE_ID, + self as ast, BareFnTy, BoundAsyncness, BoundConstness, BoundPolarity, DUMMY_NODE_ID, FnRetTy, + GenericBound, GenericBounds, GenericParam, Generics, Lifetime, MacCall, MutTy, Mutability, + PolyTraitRef, PreciseCapturingArg, TraitBoundModifiers, TraitObjectSyntax, Ty, TyKind, }; use rustc_errors::{Applicability, PResult}; -use rustc_span::symbol::{kw, sym, Ident}; +use rustc_span::symbol::{Ident, kw, sym}; use rustc_span::{ErrorGuaranteed, Span, Symbol}; -use thin_vec::{thin_vec, ThinVec}; +use thin_vec::{ThinVec, thin_vec}; use super::{Parser, PathStyle, SeqSep, TokenType, Trailing}; use crate::errors::{ @@ -607,9 +607,6 @@ impl<'a> Parser<'a> { let decl = self.parse_fn_decl(|_| false, AllowPlus::No, recover_return_sign)?; let whole_span = lo.to(self.prev_token.span); if let ast::Const::Yes(span) = constness { - // If we ever start to allow `const fn()`, then update - // feature gating for `#![feature(const_extern_fn)]` to - // cover it. self.dcx().emit_err(FnPointerCannotBeConst { span: whole_span, qualifier: span }); } if let Some(ast::CoroutineKind::Async { span, .. }) = coroutine_kind { @@ -1285,8 +1282,9 @@ impl<'a> Parser<'a> { /// Parses a single lifetime `'a` or panics. pub(super) fn expect_lifetime(&mut self) -> Lifetime { - if let Some(ident) = self.token.lifetime() { - if ident.without_first_quote().is_reserved() + if let Some((ident, is_raw)) = self.token.lifetime() { + if matches!(is_raw, IdentIsRaw::No) + && ident.without_first_quote().is_reserved() && ![kw::UnderscoreLifetime, kw::StaticLifetime].contains(&ident.name) { self.dcx().emit_err(errors::KeywordLifetime { span: ident.span }); diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs index fce41bd90be..b15d1edf79c 100644 --- a/compiler/rustc_parse/src/validate_attr.rs +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -7,12 +7,12 @@ use rustc_ast::{ NestedMetaItem, Safety, }; use rustc_errors::{Applicability, FatalError, PResult}; -use rustc_feature::{AttributeSafety, AttributeTemplate, BuiltinAttribute, BUILTIN_ATTRIBUTE_MAP}; +use rustc_feature::{AttributeSafety, AttributeTemplate, BUILTIN_ATTRIBUTE_MAP, BuiltinAttribute}; use rustc_session::errors::report_lit_error; -use rustc_session::lint::builtin::{ILL_FORMED_ATTRIBUTE_INPUT, UNSAFE_ATTR_OUTSIDE_UNSAFE}; use rustc_session::lint::BuiltinLintDiag; +use rustc_session::lint::builtin::{ILL_FORMED_ATTRIBUTE_INPUT, UNSAFE_ATTR_OUTSIDE_UNSAFE}; use rustc_session::parse::ParseSess; -use rustc_span::{sym, BytePos, Span, Symbol}; +use rustc_span::{BytePos, Span, Symbol, sym}; use crate::{errors, parse_in}; @@ -192,13 +192,11 @@ pub fn check_attribute_safety(psess: &ParseSess, safety: AttributeSafety, attr: ); } } - } else { - if let Safety::Unsafe(unsafe_span) = attr_item.unsafety { - psess.dcx().emit_err(errors::InvalidAttrUnsafe { - span: unsafe_span, - name: attr_item.path.clone(), - }); - } + } else if let Safety::Unsafe(unsafe_span) = attr_item.unsafety { + psess.dcx().emit_err(errors::InvalidAttrUnsafe { + span: unsafe_span, + name: attr_item.path.clone(), + }); } } |
