diff options
Diffstat (limited to 'compiler/rustc_parse')
| -rw-r--r-- | compiler/rustc_parse/Cargo.toml | 22 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/mod.rs | 598 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/tokentrees.rs | 313 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/unescape_error_reporting.rs | 223 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lexer/unicode_chars.rs | 392 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/lib.rs | 595 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/attr.rs | 304 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/diagnostics.rs | 1643 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 2293 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/generics.rs | 292 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/item.rs | 1843 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 1270 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/nonterminal.rs | 170 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/pat.rs | 1012 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/path.rs | 516 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/stmt.rs | 427 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/ty.rs | 631 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/validate_attr.rs | 163 |
18 files changed, 12707 insertions, 0 deletions
diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml new file mode 100644 index 00000000000..52835e5c8a9 --- /dev/null +++ b/compiler/rustc_parse/Cargo.toml @@ -0,0 +1,22 @@ +[package] +authors = ["The Rust Project Developers"] +name = "rustc_parse" +version = "0.0.0" +edition = "2018" + +[lib] +doctest = false + +[dependencies] +bitflags = "1.0" +tracing = "0.1" +rustc_ast_pretty = { path = "../rustc_ast_pretty" } +rustc_data_structures = { path = "../rustc_data_structures" } +rustc_feature = { path = "../rustc_feature" } +rustc_lexer = { path = "../rustc_lexer" } +rustc_errors = { path = "../rustc_errors" } +rustc_session = { path = "../rustc_session" } +rustc_span = { path = "../rustc_span" } +rustc_ast = { path = "../rustc_ast" } +unicode-normalization = "0.1.11" +smallvec = { version = "1.0", features = ["union", "may_dangle"] } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs new file mode 100644 index 00000000000..a65d3446819 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -0,0 +1,598 @@ +use rustc_ast::ast::AttrStyle; +use rustc_ast::token::{self, CommentKind, Token, TokenKind}; +use rustc_data_structures::sync::Lrc; +use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError}; +use rustc_lexer::Base; +use rustc_lexer::{unescape, RawStrError}; +use rustc_session::parse::ParseSess; +use rustc_span::symbol::{sym, Symbol}; +use rustc_span::{BytePos, Pos, Span}; + +use std::char; +use tracing::debug; + +mod tokentrees; +mod unescape_error_reporting; +mod unicode_chars; + +use rustc_lexer::{unescape::Mode, DocStyle}; +use unescape_error_reporting::{emit_unescape_error, push_escaped_char}; + +#[derive(Clone, Debug)] +pub struct UnmatchedBrace { + pub expected_delim: token::DelimToken, + pub found_delim: Option<token::DelimToken>, + pub found_span: Span, + pub unclosed_span: Option<Span>, + pub candidate_span: Option<Span>, +} + +pub struct StringReader<'a> { + sess: &'a ParseSess, + /// Initial position, read-only. + start_pos: BytePos, + /// The absolute offset within the source_map of the current character. + pos: BytePos, + /// Stop reading src at this index. + end_src_index: usize, + /// Source text to tokenize. + src: Lrc<String>, + override_span: Option<Span>, +} + +impl<'a> StringReader<'a> { + pub fn new( + sess: &'a ParseSess, + source_file: Lrc<rustc_span::SourceFile>, + override_span: Option<Span>, + ) -> Self { + let src = source_file.src.clone().unwrap_or_else(|| { + sess.span_diagnostic + .bug(&format!("cannot lex `source_file` without source: {}", source_file.name)); + }); + + StringReader { + sess, + start_pos: source_file.start_pos, + pos: source_file.start_pos, + end_src_index: src.len(), + src, + override_span, + } + } + + fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { + self.override_span.unwrap_or_else(|| Span::with_root_ctxt(lo, hi)) + } + + /// Returns the next token, including trivia like whitespace or comments. + pub fn next_token(&mut self) -> Token { + let start_src_index = self.src_index(self.pos); + let text: &str = &self.src[start_src_index..self.end_src_index]; + + if text.is_empty() { + let span = self.mk_sp(self.pos, self.pos); + return Token::new(token::Eof, span); + } + + { + let is_beginning_of_file = self.pos == self.start_pos; + if is_beginning_of_file { + if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { + let start = self.pos; + self.pos = self.pos + BytePos::from_usize(shebang_len); + + let sym = self.symbol_from(start + BytePos::from_usize("#!".len())); + let kind = token::Shebang(sym); + + let span = self.mk_sp(start, self.pos); + return Token::new(kind, span); + } + } + } + + let token = rustc_lexer::first_token(text); + + let start = self.pos; + self.pos = self.pos + BytePos::from_usize(token.len); + + debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start)); + + let kind = self.cook_lexer_token(token.kind, start); + let span = self.mk_sp(start, self.pos); + Token::new(kind, span) + } + + /// Report a fatal lexical error with a given span. + fn fatal_span(&self, sp: Span, m: &str) -> FatalError { + self.sess.span_diagnostic.span_fatal(sp, m) + } + + /// Report a lexical error with a given span. + fn err_span(&self, sp: Span, m: &str) { + self.sess.span_diagnostic.struct_span_err(sp, m).emit(); + } + + /// Report a fatal error spanning [`from_pos`, `to_pos`). + fn fatal_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> FatalError { + self.fatal_span(self.mk_sp(from_pos, to_pos), m) + } + + /// Report a lexical error spanning [`from_pos`, `to_pos`). + fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) { + self.err_span(self.mk_sp(from_pos, to_pos), m) + } + + fn struct_fatal_span_char( + &self, + from_pos: BytePos, + to_pos: BytePos, + m: &str, + c: char, + ) -> DiagnosticBuilder<'a> { + let mut m = m.to_string(); + m.push_str(": "); + push_escaped_char(&mut m, c); + + self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) + } + + /// Turns simple `rustc_lexer::TokenKind` enum into a rich + /// `librustc_ast::TokenKind`. This turns strings into interned + /// symbols and runs additional validation. + fn cook_lexer_token(&self, token: rustc_lexer::TokenKind, start: BytePos) -> TokenKind { + match token { + rustc_lexer::TokenKind::LineComment { doc_style } => { + match doc_style { + Some(doc_style) => { + // Opening delimiter of the length 3 is not included into the symbol. + let content_start = start + BytePos(3); + let content = self.str_from(content_start); + + self.cook_doc_comment(content_start, content, CommentKind::Line, doc_style) + } + None => token::Comment, + } + } + rustc_lexer::TokenKind::BlockComment { doc_style, terminated } => { + if !terminated { + let msg = match doc_style { + Some(_) => "unterminated block doc-comment", + None => "unterminated block comment", + }; + let last_bpos = self.pos; + self.sess + .span_diagnostic + .struct_span_fatal_with_code( + self.mk_sp(start, last_bpos), + msg, + error_code!(E0758), + ) + .emit(); + FatalError.raise(); + } + match doc_style { + Some(doc_style) => { + // Opening delimiter of the length 3 and closing delimiter of the length 2 + // are not included into the symbol. + let content_start = start + BytePos(3); + let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); + let content = self.str_from_to(content_start, content_end); + + self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) + } + None => token::Comment, + } + } + rustc_lexer::TokenKind::Whitespace => token::Whitespace, + rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => { + let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; + let mut ident_start = start; + if is_raw_ident { + ident_start = ident_start + BytePos(2); + } + let sym = nfc_normalize(self.str_from(ident_start)); + let span = self.mk_sp(start, self.pos); + self.sess.symbol_gallery.insert(sym, span); + if is_raw_ident { + if !sym.can_be_raw() { + self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); + } + self.sess.raw_identifier_spans.borrow_mut().push(span); + } + token::Ident(sym, is_raw_ident) + } + rustc_lexer::TokenKind::Literal { kind, suffix_start } => { + let suffix_start = start + BytePos(suffix_start as u32); + let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + let suffix = if suffix_start < self.pos { + let string = self.str_from(suffix_start); + if string == "_" { + self.sess + .span_diagnostic + .struct_span_warn( + self.mk_sp(suffix_start, self.pos), + "underscore literal suffix is not allowed", + ) + .warn( + "this was previously accepted by the compiler but is \ + being phased out; it will become a hard error in \ + a future release!", + ) + .note( + "see issue #42326 \ + <https://github.com/rust-lang/rust/issues/42326> \ + for more information", + ) + .emit(); + None + } else { + Some(Symbol::intern(string)) + } + } else { + None + }; + token::Literal(token::Lit { kind, symbol, suffix }) + } + rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + if starts_with_number { + self.err_span_(start, self.pos, "lifetimes cannot start with a number"); + } + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident) + } + rustc_lexer::TokenKind::Semi => token::Semi, + rustc_lexer::TokenKind::Comma => token::Comma, + rustc_lexer::TokenKind::Dot => token::Dot, + rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren), + rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren), + rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace), + rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace), + rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket), + rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket), + rustc_lexer::TokenKind::At => token::At, + rustc_lexer::TokenKind::Pound => token::Pound, + rustc_lexer::TokenKind::Tilde => token::Tilde, + rustc_lexer::TokenKind::Question => token::Question, + rustc_lexer::TokenKind::Colon => token::Colon, + rustc_lexer::TokenKind::Dollar => token::Dollar, + rustc_lexer::TokenKind::Eq => token::Eq, + rustc_lexer::TokenKind::Bang => token::Not, + rustc_lexer::TokenKind::Lt => token::Lt, + rustc_lexer::TokenKind::Gt => token::Gt, + rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), + rustc_lexer::TokenKind::And => token::BinOp(token::And), + rustc_lexer::TokenKind::Or => token::BinOp(token::Or), + rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), + rustc_lexer::TokenKind::Star => token::BinOp(token::Star), + rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), + rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), + rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), + + rustc_lexer::TokenKind::Unknown => { + let c = self.str_from(start).chars().next().unwrap(); + let mut err = + self.struct_fatal_span_char(start, self.pos, "unknown start of token", c); + // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, + // instead of keeping a table in `check_for_substitution`into the token. Ideally, + // this should be inside `rustc_lexer`. However, we should first remove compound + // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, + // as there will be less overall work to do this way. + let token = unicode_chars::check_for_substitution(self, start, c, &mut err) + .unwrap_or_else(|| token::Unknown(self.symbol_from(start))); + err.emit(); + token + } + } + } + + fn cook_doc_comment( + &self, + content_start: BytePos, + content: &str, + comment_kind: CommentKind, + doc_style: DocStyle, + ) -> TokenKind { + if content.contains('\r') { + for (idx, _) in content.char_indices().filter(|&(_, c)| c == '\r') { + self.err_span_( + content_start + BytePos(idx as u32), + content_start + BytePos(idx as u32 + 1), + match comment_kind { + CommentKind::Line => "bare CR not allowed in doc-comment", + CommentKind::Block => "bare CR not allowed in block doc-comment", + }, + ); + } + } + + let attr_style = match doc_style { + DocStyle::Outer => AttrStyle::Outer, + DocStyle::Inner => AttrStyle::Inner, + }; + + token::DocComment(comment_kind, attr_style, Symbol::intern(content)) + } + + fn cook_lexer_literal( + &self, + start: BytePos, + suffix_start: BytePos, + kind: rustc_lexer::LiteralKind, + ) -> (token::LitKind, Symbol) { + // prefix means `"` or `br"` or `r###"`, ... + let (lit_kind, mode, prefix_len, postfix_len) = match kind { + rustc_lexer::LiteralKind::Char { terminated } => { + if !terminated { + self.sess + .span_diagnostic + .struct_span_fatal_with_code( + self.mk_sp(start, suffix_start), + "unterminated character literal", + error_code!(E0762), + ) + .emit(); + FatalError.raise(); + } + (token::Char, Mode::Char, 1, 1) // ' ' + } + rustc_lexer::LiteralKind::Byte { terminated } => { + if !terminated { + self.sess + .span_diagnostic + .struct_span_fatal_with_code( + self.mk_sp(start + BytePos(1), suffix_start), + "unterminated byte constant", + error_code!(E0763), + ) + .emit(); + FatalError.raise(); + } + (token::Byte, Mode::Byte, 2, 1) // b' ' + } + rustc_lexer::LiteralKind::Str { terminated } => { + if !terminated { + self.sess + .span_diagnostic + .struct_span_fatal_with_code( + self.mk_sp(start, suffix_start), + "unterminated double quote string", + error_code!(E0765), + ) + .emit(); + FatalError.raise(); + } + (token::Str, Mode::Str, 1, 1) // " " + } + rustc_lexer::LiteralKind::ByteStr { terminated } => { + if !terminated { + self.sess + .span_diagnostic + .struct_span_fatal_with_code( + self.mk_sp(start + BytePos(1), suffix_start), + "unterminated double quote byte string", + error_code!(E0766), + ) + .emit(); + FatalError.raise(); + } + (token::ByteStr, Mode::ByteStr, 2, 1) // b" " + } + rustc_lexer::LiteralKind::RawStr { n_hashes, err } => { + self.report_raw_str_error(start, err); + let n = u32::from(n_hashes); + (token::StrRaw(n_hashes), Mode::RawStr, 2 + n, 1 + n) // r##" "## + } + rustc_lexer::LiteralKind::RawByteStr { n_hashes, err } => { + self.report_raw_str_error(start, err); + let n = u32::from(n_hashes); + (token::ByteStrRaw(n_hashes), Mode::RawByteStr, 3 + n, 1 + n) // br##" "## + } + rustc_lexer::LiteralKind::Int { base, empty_int } => { + return if empty_int { + self.sess + .span_diagnostic + .struct_span_err_with_code( + self.mk_sp(start, suffix_start), + "no valid digits found for number", + error_code!(E0768), + ) + .emit(); + (token::Integer, sym::integer(0)) + } else { + self.validate_int_literal(base, start, suffix_start); + (token::Integer, self.symbol_from_to(start, suffix_start)) + }; + } + rustc_lexer::LiteralKind::Float { base, empty_exponent } => { + if empty_exponent { + self.err_span_(start, self.pos, "expected at least one digit in exponent"); + } + + match base { + Base::Hexadecimal => self.err_span_( + start, + suffix_start, + "hexadecimal float literal is not supported", + ), + Base::Octal => { + self.err_span_(start, suffix_start, "octal float literal is not supported") + } + Base::Binary => { + self.err_span_(start, suffix_start, "binary float literal is not supported") + } + _ => (), + } + + let id = self.symbol_from_to(start, suffix_start); + return (token::Float, id); + } + }; + let content_start = start + BytePos(prefix_len); + let content_end = suffix_start - BytePos(postfix_len); + let id = self.symbol_from_to(content_start, content_end); + self.validate_literal_escape(mode, content_start, content_end); + (lit_kind, id) + } + + pub fn pos(&self) -> BytePos { + self.pos + } + + #[inline] + fn src_index(&self, pos: BytePos) -> usize { + (pos - self.start_pos).to_usize() + } + + /// Slice of the source text from `start` up to but excluding `self.pos`, + /// meaning the slice does not include the character `self.ch`. + fn str_from(&self, start: BytePos) -> &str { + self.str_from_to(start, self.pos) + } + + /// Creates a Symbol from a given offset to the current offset. + fn symbol_from(&self, start: BytePos) -> Symbol { + debug!("taking an ident from {:?} to {:?}", start, self.pos); + Symbol::intern(self.str_from(start)) + } + + /// As symbol_from, with an explicit endpoint. + fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol { + debug!("taking an ident from {:?} to {:?}", start, end); + Symbol::intern(self.str_from_to(start, end)) + } + + /// Slice of the source text spanning from `start` up to but excluding `end`. + fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { + &self.src[self.src_index(start)..self.src_index(end)] + } + + fn report_raw_str_error(&self, start: BytePos, opt_err: Option<RawStrError>) { + match opt_err { + Some(RawStrError::InvalidStarter { bad_char }) => { + self.report_non_started_raw_string(start, bad_char) + } + Some(RawStrError::NoTerminator { expected, found, possible_terminator_offset }) => self + .report_unterminated_raw_string(start, expected, possible_terminator_offset, found), + Some(RawStrError::TooManyDelimiters { found }) => { + self.report_too_many_hashes(start, found) + } + None => (), + } + } + + fn report_non_started_raw_string(&self, start: BytePos, bad_char: char) -> ! { + self.struct_fatal_span_char( + start, + self.pos, + "found invalid character; only `#` is allowed in raw string delimitation", + bad_char, + ) + .emit(); + FatalError.raise() + } + + fn report_unterminated_raw_string( + &self, + start: BytePos, + n_hashes: usize, + possible_offset: Option<usize>, + found_terminators: usize, + ) -> ! { + let mut err = self.sess.span_diagnostic.struct_span_fatal_with_code( + self.mk_sp(start, start), + "unterminated raw string", + error_code!(E0748), + ); + + err.span_label(self.mk_sp(start, start), "unterminated raw string"); + + if n_hashes > 0 { + err.note(&format!( + "this raw string should be terminated with `\"{}`", + "#".repeat(n_hashes) + )); + } + + if let Some(possible_offset) = possible_offset { + let lo = start + BytePos(possible_offset as u32); + let hi = lo + BytePos(found_terminators as u32); + let span = self.mk_sp(lo, hi); + err.span_suggestion( + span, + "consider terminating the string here", + "#".repeat(n_hashes), + Applicability::MaybeIncorrect, + ); + } + + err.emit(); + FatalError.raise() + } + + /// Note: It was decided to not add a test case, because it would be to big. + /// https://github.com/rust-lang/rust/pull/50296#issuecomment-392135180 + fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! { + self.fatal_span_( + start, + self.pos, + &format!( + "too many `#` symbols: raw strings may be delimited \ + by up to 65535 `#` symbols, but found {}", + found + ), + ) + .raise(); + } + + fn validate_literal_escape(&self, mode: Mode, content_start: BytePos, content_end: BytePos) { + let lit_content = self.str_from_to(content_start, content_end); + unescape::unescape_literal(lit_content, mode, &mut |range, result| { + // Here we only check for errors. The actual unescaping is done later. + if let Err(err) = result { + let span_with_quotes = + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)); + emit_unescape_error( + &self.sess.span_diagnostic, + lit_content, + span_with_quotes, + mode, + range, + err, + ); + } + }); + } + + fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { + let base = match base { + Base::Binary => 2, + Base::Octal => 8, + _ => return, + }; + let s = self.str_from_to(content_start + BytePos(2), content_end); + for (idx, c) in s.char_indices() { + let idx = idx as u32; + if c != '_' && c.to_digit(base).is_none() { + let lo = content_start + BytePos(2 + idx); + let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); + self.err_span_(lo, hi, &format!("invalid digit for a base {} literal", base)); + } + } + } +} + +pub fn nfc_normalize(string: &str) -> Symbol { + use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization}; + match is_nfc_quick(string.chars()) { + IsNormalized::Yes => Symbol::intern(string), + _ => { + let normalized_str: String = string.chars().nfc().collect(); + Symbol::intern(&normalized_str) + } + } +} diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs new file mode 100644 index 00000000000..c08659ec9f6 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -0,0 +1,313 @@ +use super::{StringReader, UnmatchedBrace}; + +use rustc_ast::token::{self, DelimToken, Token}; +use rustc_ast::tokenstream::{ + DelimSpan, + IsJoint::{self, *}, + TokenStream, TokenTree, TreeAndJoint, +}; +use rustc_ast_pretty::pprust::token_to_string; +use rustc_data_structures::fx::FxHashMap; +use rustc_errors::PResult; +use rustc_span::Span; + +impl<'a> StringReader<'a> { + crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) { + let mut tt_reader = TokenTreesReader { + string_reader: self, + token: Token::dummy(), + joint_to_prev: Joint, + open_braces: Vec::new(), + unmatched_braces: Vec::new(), + matching_delim_spans: Vec::new(), + last_unclosed_found_span: None, + last_delim_empty_block_spans: FxHashMap::default(), + matching_block_spans: Vec::new(), + }; + let res = tt_reader.parse_all_token_trees(); + (res, tt_reader.unmatched_braces) + } +} + +struct TokenTreesReader<'a> { + string_reader: StringReader<'a>, + token: Token, + joint_to_prev: IsJoint, + /// Stack of open delimiters and their spans. Used for error message. + open_braces: Vec<(token::DelimToken, Span)>, + unmatched_braces: Vec<UnmatchedBrace>, + /// The type and spans for all braces + /// + /// Used only for error recovery when arriving to EOF with mismatched braces. + matching_delim_spans: Vec<(token::DelimToken, Span, Span)>, + last_unclosed_found_span: Option<Span>, + /// Collect empty block spans that might have been auto-inserted by editors. + last_delim_empty_block_spans: FxHashMap<token::DelimToken, Span>, + /// Collect the spans of braces (Open, Close). Used only + /// for detecting if blocks are empty and only braces. + matching_block_spans: Vec<(Span, Span)>, +} + +impl<'a> TokenTreesReader<'a> { + // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. + fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { + let mut buf = TokenStreamBuilder::default(); + + self.real_token(); + while self.token != token::Eof { + buf.push(self.parse_token_tree()?); + } + + Ok(buf.into_token_stream()) + } + + // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. + fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { + let mut buf = TokenStreamBuilder::default(); + loop { + if let token::CloseDelim(..) = self.token.kind { + return buf.into_token_stream(); + } + + match self.parse_token_tree() { + Ok(tree) => buf.push(tree), + Err(mut e) => { + e.emit(); + return buf.into_token_stream(); + } + } + } + } + + fn parse_token_tree(&mut self) -> PResult<'a, TreeAndJoint> { + let sm = self.string_reader.sess.source_map(); + + match self.token.kind { + token::Eof => { + let msg = "this file contains an unclosed delimiter"; + let mut err = + self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, msg); + for &(_, sp) in &self.open_braces { + err.span_label(sp, "unclosed delimiter"); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: token::DelimToken::Brace, + found_delim: None, + found_span: self.token.span, + unclosed_span: Some(sp), + candidate_span: None, + }); + } + + if let Some((delim, _)) = self.open_braces.last() { + if let Some((_, open_sp, close_sp)) = + self.matching_delim_spans.iter().find(|(d, open_sp, close_sp)| { + if let Some(close_padding) = sm.span_to_margin(*close_sp) { + if let Some(open_padding) = sm.span_to_margin(*open_sp) { + return delim == d && close_padding != open_padding; + } + } + false + }) + // these are in reverse order as they get inserted on close, but + { + // we want the last open/first close + err.span_label(*open_sp, "this delimiter might not be properly closed..."); + err.span_label( + *close_sp, + "...as it matches this but it has different indentation", + ); + } + } + Err(err) + } + token::OpenDelim(delim) => { + // The span for beginning of the delimited section + let pre_span = self.token.span; + + // Parse the open delimiter. + self.open_braces.push((delim, self.token.span)); + self.real_token(); + + // Parse the token trees within the delimiters. + // We stop at any delimiter so we can try to recover if the user + // uses an incorrect delimiter. + let tts = self.parse_token_trees_until_close_delim(); + + // Expand to cover the entire delimited token tree + let delim_span = DelimSpan::from_pair(pre_span, self.token.span); + + match self.token.kind { + // Correct delimiter. + token::CloseDelim(d) if d == delim => { + let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); + let close_brace_span = self.token.span; + + if tts.is_empty() { + let empty_block_span = open_brace_span.to(close_brace_span); + if !sm.is_multiline(empty_block_span) { + // Only track if the block is in the form of `{}`, otherwise it is + // likely that it was written on purpose. + self.last_delim_empty_block_spans.insert(delim, empty_block_span); + } + } + + match (open_brace, delim) { + //only add braces + (DelimToken::Brace, DelimToken::Brace) => { + self.matching_block_spans.push((open_brace_span, close_brace_span)); + } + _ => {} + } + + if self.open_braces.is_empty() { + // Clear up these spans to avoid suggesting them as we've found + // properly matched delimiters so far for an entire block. + self.matching_delim_spans.clear(); + } else { + self.matching_delim_spans.push(( + open_brace, + open_brace_span, + close_brace_span, + )); + } + // Parse the closing delimiter. + self.real_token(); + } + // Incorrect delimiter. + token::CloseDelim(other) => { + let mut unclosed_delimiter = None; + let mut candidate = None; + + if self.last_unclosed_found_span != Some(self.token.span) { + // do not complain about the same unclosed delimiter multiple times + self.last_unclosed_found_span = Some(self.token.span); + // This is a conservative error: only report the last unclosed + // delimiter. The previous unclosed delimiters could actually be + // closed! The parser just hasn't gotten to them yet. + if let Some(&(_, sp)) = self.open_braces.last() { + unclosed_delimiter = Some(sp); + }; + if let Some(current_padding) = sm.span_to_margin(self.token.span) { + for (brace, brace_span) in &self.open_braces { + if let Some(padding) = sm.span_to_margin(*brace_span) { + // high likelihood of these two corresponding + if current_padding == padding && brace == &other { + candidate = Some(*brace_span); + } + } + } + } + let (tok, _) = self.open_braces.pop().unwrap(); + self.unmatched_braces.push(UnmatchedBrace { + expected_delim: tok, + found_delim: Some(other), + found_span: self.token.span, + unclosed_span: unclosed_delimiter, + candidate_span: candidate, + }); + } else { + self.open_braces.pop(); + } + + // If the incorrect delimiter matches an earlier opening + // delimiter, then don't consume it (it can be used to + // close the earlier one). Otherwise, consume it. + // E.g., we try to recover from: + // fn foo() { + // bar(baz( + // } // Incorrect delimiter but matches the earlier `{` + if !self.open_braces.iter().any(|&(b, _)| b == other) { + self.real_token(); + } + } + token::Eof => { + // Silently recover, the EOF token will be seen again + // and an error emitted then. Thus we don't pop from + // self.open_braces here. + } + _ => {} + } + + Ok(TokenTree::Delimited(delim_span, delim, tts).into()) + } + token::CloseDelim(delim) => { + // An unexpected closing delimiter (i.e., there is no + // matching opening delimiter). + let token_str = token_to_string(&self.token); + let msg = format!("unexpected closing delimiter: `{}`", token_str); + let mut err = + self.string_reader.sess.span_diagnostic.struct_span_err(self.token.span, &msg); + + // Braces are added at the end, so the last element is the biggest block + if let Some(parent) = self.matching_block_spans.last() { + if let Some(span) = self.last_delim_empty_block_spans.remove(&delim) { + // Check if the (empty block) is in the last properly closed block + if (parent.0.to(parent.1)).contains(span) { + err.span_label( + span, + "block is empty, you might have not meant to close it", + ); + } else { + err.span_label(parent.0, "this opening brace..."); + + err.span_label(parent.1, "...matches this closing brace"); + } + } else { + err.span_label(parent.0, "this opening brace..."); + + err.span_label(parent.1, "...matches this closing brace"); + } + } + + err.span_label(self.token.span, "unexpected closing delimiter"); + Err(err) + } + _ => { + let tt = TokenTree::Token(self.token.take()); + self.real_token(); + let is_joint = self.joint_to_prev == Joint && self.token.is_op(); + Ok((tt, if is_joint { Joint } else { NonJoint })) + } + } + } + + fn real_token(&mut self) { + self.joint_to_prev = Joint; + loop { + let token = self.string_reader.next_token(); + match token.kind { + token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => { + self.joint_to_prev = NonJoint; + } + _ => { + self.token = token; + return; + } + } + } + } +} + +#[derive(Default)] +struct TokenStreamBuilder { + buf: Vec<TreeAndJoint>, +} + +impl TokenStreamBuilder { + fn push(&mut self, (tree, joint): TreeAndJoint) { + if let Some((TokenTree::Token(prev_token), Joint)) = self.buf.last() { + if let TokenTree::Token(token) = &tree { + if let Some(glued) = prev_token.glue(token) { + self.buf.pop(); + self.buf.push((TokenTree::Token(glued), joint)); + return; + } + } + } + self.buf.push((tree, joint)) + } + + fn into_token_stream(self) -> TokenStream { + TokenStream::new(self.buf) + } +} diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs new file mode 100644 index 00000000000..6f249f491a6 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -0,0 +1,223 @@ +//! Utilities for rendering escape sequence errors as diagnostics. + +use std::iter::once; +use std::ops::Range; + +use rustc_errors::{Applicability, Handler}; +use rustc_lexer::unescape::{EscapeError, Mode}; +use rustc_span::{BytePos, Span}; + +pub(crate) fn emit_unescape_error( + handler: &Handler, + // interior part of the literal, without quotes + lit: &str, + // full span of the literal, including quotes + span_with_quotes: Span, + mode: Mode, + // range of the error inside `lit` + range: Range<usize>, + error: EscapeError, +) { + tracing::debug!( + "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", + lit, + span_with_quotes, + mode, + range, + error + ); + let span = { + let Range { start, end } = range; + let (start, end) = (start as u32, end as u32); + let lo = span_with_quotes.lo() + BytePos(start + 1); + let hi = lo + BytePos(end - start); + span_with_quotes.with_lo(lo).with_hi(hi) + }; + let last_char = || { + let c = lit[range.clone()].chars().rev().next().unwrap(); + let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); + (c, span) + }; + match error { + EscapeError::LoneSurrogateUnicodeEscape => { + handler + .struct_span_err(span, "invalid unicode character escape") + .help("unicode escape must not be a surrogate") + .emit(); + } + EscapeError::OutOfRangeUnicodeEscape => { + handler + .struct_span_err(span, "invalid unicode character escape") + .help("unicode escape must be at most 10FFFF") + .emit(); + } + EscapeError::MoreThanOneChar => { + let msg = if mode.is_bytes() { + "if you meant to write a byte string literal, use double quotes" + } else { + "if you meant to write a `str` literal, use double quotes" + }; + + handler + .struct_span_err( + span_with_quotes, + "character literal may only contain one codepoint", + ) + .span_suggestion( + span_with_quotes, + msg, + format!("\"{}\"", lit), + Applicability::MachineApplicable, + ) + .emit(); + } + EscapeError::EscapeOnlyChar => { + let (c, _span) = last_char(); + + let mut msg = if mode.is_bytes() { + "byte constant must be escaped: " + } else { + "character constant must be escaped: " + } + .to_string(); + push_escaped_char(&mut msg, c); + + handler.span_err(span, msg.as_str()) + } + EscapeError::BareCarriageReturn => { + let msg = if mode.in_double_quotes() { + "bare CR not allowed in string, use \\r instead" + } else { + "character constant must be escaped: \\r" + }; + handler.span_err(span, msg); + } + EscapeError::BareCarriageReturnInRawString => { + assert!(mode.in_double_quotes()); + let msg = "bare CR not allowed in raw string"; + handler.span_err(span, msg); + } + EscapeError::InvalidEscape => { + let (c, span) = last_char(); + + let label = + if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; + let mut msg = label.to_string(); + msg.push_str(": "); + push_escaped_char(&mut msg, c); + + let mut diag = handler.struct_span_err(span, msg.as_str()); + diag.span_label(span, label); + if c == '{' || c == '}' && !mode.is_bytes() { + diag.help( + "if used in a formatting string, \ + curly braces are escaped with `{{` and `}}`", + ); + } else if c == '\r' { + diag.help( + "this is an isolated carriage return; \ + consider checking your editor and version control settings", + ); + } + diag.emit(); + } + EscapeError::TooShortHexEscape => { + handler.span_err(span, "numeric character escape is too short") + } + EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { + let (c, span) = last_char(); + + let mut msg = if error == EscapeError::InvalidCharInHexEscape { + "invalid character in numeric character escape: " + } else { + "invalid character in unicode escape: " + } + .to_string(); + push_escaped_char(&mut msg, c); + + handler.span_err(span, msg.as_str()) + } + EscapeError::NonAsciiCharInByte => { + assert!(mode.is_bytes()); + let (_c, span) = last_char(); + handler.span_err( + span, + "byte constant must be ASCII. \ + Use a \\xHH escape for a non-ASCII byte", + ) + } + EscapeError::NonAsciiCharInByteString => { + assert!(mode.is_bytes()); + let (_c, span) = last_char(); + handler.span_err(span, "raw byte string must be ASCII") + } + EscapeError::OutOfRangeHexEscape => handler.span_err( + span, + "this form of character escape may only be used \ + with characters in the range [\\x00-\\x7f]", + ), + EscapeError::LeadingUnderscoreUnicodeEscape => { + let (_c, span) = last_char(); + handler.span_err(span, "invalid start of unicode escape") + } + EscapeError::OverlongUnicodeEscape => { + handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)") + } + EscapeError::UnclosedUnicodeEscape => { + handler.span_err(span, "unterminated unicode escape (needed a `}`)") + } + EscapeError::NoBraceInUnicodeEscape => { + let msg = "incorrect unicode escape sequence"; + let mut diag = handler.struct_span_err(span, msg); + + let mut suggestion = "\\u{".to_owned(); + let mut suggestion_len = 0; + let (c, char_span) = last_char(); + let chars = once(c).chain(lit[range.end..].chars()); + for c in chars.take(6).take_while(|c| c.is_digit(16)) { + suggestion.push(c); + suggestion_len += c.len_utf8(); + } + + if suggestion_len > 0 { + suggestion.push('}'); + let lo = char_span.lo(); + let hi = lo + BytePos(suggestion_len as u32); + diag.span_suggestion( + span.with_lo(lo).with_hi(hi), + "format of unicode escape sequences uses braces", + suggestion, + Applicability::MaybeIncorrect, + ); + } else { + diag.span_label(span, msg); + diag.help("format of unicode escape sequences is `\\u{...}`"); + } + + diag.emit(); + } + EscapeError::UnicodeEscapeInByte => handler.span_err( + span, + "unicode escape sequences cannot be used \ + as a byte or in a byte string", + ), + EscapeError::EmptyUnicodeEscape => { + handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)") + } + EscapeError::ZeroChars => handler.span_err(span, "empty character literal"), + EscapeError::LoneSlash => handler.span_err(span, "invalid trailing slash in literal"), + } +} + +/// Pushes a character to a message string for error reporting +pub(crate) fn push_escaped_char(msg: &mut String, c: char) { + match c { + '\u{20}'..='\u{7e}' => { + // Don't escape \, ' or " for user-facing messages + msg.push(c); + } + _ => { + msg.extend(c.escape_default()); + } + } +} diff --git a/compiler/rustc_parse/src/lexer/unicode_chars.rs b/compiler/rustc_parse/src/lexer/unicode_chars.rs new file mode 100644 index 00000000000..ac395f6cbc2 --- /dev/null +++ b/compiler/rustc_parse/src/lexer/unicode_chars.rs @@ -0,0 +1,392 @@ +// Characters and their corresponding confusables were collected from +// http://www.unicode.org/Public/security/10.0.0/confusables.txt + +use super::StringReader; +use crate::token; +use rustc_errors::{Applicability, DiagnosticBuilder}; +use rustc_span::{symbol::kw, BytePos, Pos, Span}; + +#[rustfmt::skip] // for line breaks +const UNICODE_ARRAY: &[(char, &str, char)] = &[ + (' ', "Line Separator", ' '), + (' ', "Paragraph Separator", ' '), + (' ', "Ogham Space mark", ' '), + (' ', "En Quad", ' '), + (' ', "Em Quad", ' '), + (' ', "En Space", ' '), + (' ', "Em Space", ' '), + (' ', "Three-Per-Em Space", ' '), + (' ', "Four-Per-Em Space", ' '), + (' ', "Six-Per-Em Space", ' '), + (' ', "Punctuation Space", ' '), + (' ', "Thin Space", ' '), + (' ', "Hair Space", ' '), + (' ', "Medium Mathematical Space", ' '), + (' ', "No-Break Space", ' '), + (' ', "Figure Space", ' '), + (' ', "Narrow No-Break Space", ' '), + (' ', "Ideographic Space", ' '), + + ('ߺ', "Nko Lajanyalan", '_'), + ('﹍', "Dashed Low Line", '_'), + ('﹎', "Centreline Low Line", '_'), + ('﹏', "Wavy Low Line", '_'), + ('_', "Fullwidth Low Line", '_'), + + ('‐', "Hyphen", '-'), + ('‑', "Non-Breaking Hyphen", '-'), + ('‒', "Figure Dash", '-'), + ('–', "En Dash", '-'), + ('—', "Em Dash", '-'), + ('﹘', "Small Em Dash", '-'), + ('۔', "Arabic Full Stop", '-'), + ('⁃', "Hyphen Bullet", '-'), + ('˗', "Modifier Letter Minus Sign", '-'), + ('−', "Minus Sign", '-'), + ('➖', "Heavy Minus Sign", '-'), + ('Ⲻ', "Coptic Letter Dialect-P Ni", '-'), + ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'), + ('-', "Fullwidth Hyphen-Minus", '-'), + ('―', "Horizontal Bar", '-'), + ('─', "Box Drawings Light Horizontal", '-'), + ('━', "Box Drawings Heavy Horizontal", '-'), + ('㇐', "CJK Stroke H", '-'), + ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'), + ('ᅳ', "Hangul Jungseong Eu", '-'), + ('ㅡ', "Hangul Letter Eu", '-'), + ('一', "CJK Unified Ideograph-4E00", '-'), + ('⼀', "Kangxi Radical One", '-'), + + ('؍', "Arabic Date Separator", ','), + ('٫', "Arabic Decimal Separator", ','), + ('‚', "Single Low-9 Quotation Mark", ','), + ('¸', "Cedilla", ','), + ('ꓹ', "Lisu Letter Tone Na Po", ','), + (',', "Fullwidth Comma", ','), + + (';', "Greek Question Mark", ';'), + (';', "Fullwidth Semicolon", ';'), + ('︔', "Presentation Form For Vertical Semicolon", ';'), + + ('ः', "Devanagari Sign Visarga", ':'), + ('ઃ', "Gujarati Sign Visarga", ':'), + (':', "Fullwidth Colon", ':'), + ('։', "Armenian Full Stop", ':'), + ('܃', "Syriac Supralinear Colon", ':'), + ('܄', "Syriac Sublinear Colon", ':'), + ('᛬', "Runic Multiple Punctuation", ':'), + ('︰', "Presentation Form For Vertical Two Dot Leader", ':'), + ('᠃', "Mongolian Full Stop", ':'), + ('᠉', "Mongolian Manchu Full Stop", ':'), + ('⁚', "Two Dot Punctuation", ':'), + ('׃', "Hebrew Punctuation Sof Pasuq", ':'), + ('˸', "Modifier Letter Raised Colon", ':'), + ('꞉', "Modifier Letter Colon", ':'), + ('∶', "Ratio", ':'), + ('ː', "Modifier Letter Triangular Colon", ':'), + ('ꓽ', "Lisu Letter Tone Mya Jeu", ':'), + ('︓', "Presentation Form For Vertical Colon", ':'), + + ('!', "Fullwidth Exclamation Mark", '!'), + ('ǃ', "Latin Letter Retroflex Click", '!'), + ('ⵑ', "Tifinagh Letter Tuareg Yang", '!'), + ('︕', "Presentation Form For Vertical Exclamation Mark", '!'), + + ('ʔ', "Latin Letter Glottal Stop", '?'), + ('Ɂ', "Latin Capital Letter Glottal Stop", '?'), + ('ॽ', "Devanagari Letter Glottal Stop", '?'), + ('Ꭾ', "Cherokee Letter He", '?'), + ('ꛫ', "Bamum Letter Ntuu", '?'), + ('?', "Fullwidth Question Mark", '?'), + ('︖', "Presentation Form For Vertical Question Mark", '?'), + + ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'), + ('․', "One Dot Leader", '.'), + ('܁', "Syriac Supralinear Full Stop", '.'), + ('܂', "Syriac Sublinear Full Stop", '.'), + ('꘎', "Vai Full Stop", '.'), + ('𐩐', "Kharoshthi Punctuation Dot", '.'), + ('٠', "Arabic-Indic Digit Zero", '.'), + ('۰', "Extended Arabic-Indic Digit Zero", '.'), + ('ꓸ', "Lisu Letter Tone Mya Ti", '.'), + ('·', "Middle Dot", '.'), + ('・', "Katakana Middle Dot", '.'), + ('・', "Halfwidth Katakana Middle Dot", '.'), + ('᛫', "Runic Single Punctuation", '.'), + ('·', "Greek Ano Teleia", '.'), + ('⸱', "Word Separator Middle Dot", '.'), + ('𐄁', "Aegean Word Separator Dot", '.'), + ('•', "Bullet", '.'), + ('‧', "Hyphenation Point", '.'), + ('∙', "Bullet Operator", '.'), + ('⋅', "Dot Operator", '.'), + ('ꞏ', "Latin Letter Sinological Dot", '.'), + ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), + ('ᐧ', "Canadian Syllabics Final Middle Dot", '.'), + ('.', "Fullwidth Full Stop", '.'), + ('。', "Ideographic Full Stop", '.'), + ('︒', "Presentation Form For Vertical Ideographic Full Stop", '.'), + + ('՝', "Armenian Comma", '\''), + (''', "Fullwidth Apostrophe", '\''), + ('‘', "Left Single Quotation Mark", '\''), + ('’', "Right Single Quotation Mark", '\''), + ('‛', "Single High-Reversed-9 Quotation Mark", '\''), + ('′', "Prime", '\''), + ('‵', "Reversed Prime", '\''), + ('՚', "Armenian Apostrophe", '\''), + ('׳', "Hebrew Punctuation Geresh", '\''), + ('`', "Grave Accent", '\''), + ('`', "Greek Varia", '\''), + ('`', "Fullwidth Grave Accent", '\''), + ('´', "Acute Accent", '\''), + ('΄', "Greek Tonos", '\''), + ('´', "Greek Oxia", '\''), + ('᾽', "Greek Koronis", '\''), + ('᾿', "Greek Psili", '\''), + ('῾', "Greek Dasia", '\''), + ('ʹ', "Modifier Letter Prime", '\''), + ('ʹ', "Greek Numeral Sign", '\''), + ('ˈ', "Modifier Letter Vertical Line", '\''), + ('ˊ', "Modifier Letter Acute Accent", '\''), + ('ˋ', "Modifier Letter Grave Accent", '\''), + ('˴', "Modifier Letter Middle Grave Accent", '\''), + ('ʻ', "Modifier Letter Turned Comma", '\''), + ('ʽ', "Modifier Letter Reversed Comma", '\''), + ('ʼ', "Modifier Letter Apostrophe", '\''), + ('ʾ', "Modifier Letter Right Half Ring", '\''), + ('ꞌ', "Latin Small Letter Saltillo", '\''), + ('י', "Hebrew Letter Yod", '\''), + ('ߴ', "Nko High Tone Apostrophe", '\''), + ('ߵ', "Nko Low Tone Apostrophe", '\''), + ('ᑊ', "Canadian Syllabics West-Cree P", '\''), + ('ᛌ', "Runic Letter Short-Twig-Sol S", '\''), + ('𖽑', "Miao Sign Aspiration", '\''), + ('𖽒', "Miao Sign Reformed Voicing", '\''), + + ('᳓', "Vedic Sign Nihshvasa", '"'), + ('"', "Fullwidth Quotation Mark", '"'), + ('“', "Left Double Quotation Mark", '"'), + ('”', "Right Double Quotation Mark", '"'), + ('‟', "Double High-Reversed-9 Quotation Mark", '"'), + ('″', "Double Prime", '"'), + ('‶', "Reversed Double Prime", '"'), + ('〃', "Ditto Mark", '"'), + ('״', "Hebrew Punctuation Gershayim", '"'), + ('˝', "Double Acute Accent", '"'), + ('ʺ', "Modifier Letter Double Prime", '"'), + ('˶', "Modifier Letter Middle Double Acute Accent", '"'), + ('˵', "Modifier Letter Middle Double Grave Accent", '"'), + ('ˮ', "Modifier Letter Double Apostrophe", '"'), + ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'), + ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'), + ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'), + + ('(', "Fullwidth Left Parenthesis", '('), + ('❨', "Medium Left Parenthesis Ornament", '('), + ('﴾', "Ornate Left Parenthesis", '('), + + (')', "Fullwidth Right Parenthesis", ')'), + ('❩', "Medium Right Parenthesis Ornament", ')'), + ('﴿', "Ornate Right Parenthesis", ')'), + + ('[', "Fullwidth Left Square Bracket", '['), + ('❲', "Light Left Tortoise Shell Bracket Ornament", '['), + ('「', "Left Corner Bracket", '['), + ('『', "Left White Corner Bracket", '['), + ('【', "Left Black Lenticular Bracket", '['), + ('〔', "Left Tortoise Shell Bracket", '['), + ('〖', "Left White Lenticular Bracket", '['), + ('〘', "Left White Tortoise Shell Bracket", '['), + ('〚', "Left White Square Bracket", '['), + + (']', "Fullwidth Right Square Bracket", ']'), + ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'), + ('」', "Right Corner Bracket", ']'), + ('』', "Right White Corner Bracket", ']'), + ('】', "Right Black Lenticular Bracket", ']'), + ('〕', "Right Tortoise Shell Bracket", ']'), + ('〗', "Right White Lenticular Bracket", ']'), + ('〙', "Right White Tortoise Shell Bracket", ']'), + ('〛', "Right White Square Bracket", ']'), + + ('❴', "Medium Left Curly Bracket Ornament", '{'), + ('𝄔', "Musical Symbol Brace", '{'), + ('{', "Fullwidth Left Curly Bracket", '{'), + + ('❵', "Medium Right Curly Bracket Ornament", '}'), + ('}', "Fullwidth Right Curly Bracket", '}'), + + ('⁎', "Low Asterisk", '*'), + ('٭', "Arabic Five Pointed Star", '*'), + ('∗', "Asterisk Operator", '*'), + ('𐌟', "Old Italic Letter Ess", '*'), + ('*', "Fullwidth Asterisk", '*'), + + ('᜵', "Philippine Single Punctuation", '/'), + ('⁁', "Caret Insertion Point", '/'), + ('∕', "Division Slash", '/'), + ('⁄', "Fraction Slash", '/'), + ('╱', "Box Drawings Light Diagonal Upper Right To Lower Left", '/'), + ('⟋', "Mathematical Rising Diagonal", '/'), + ('⧸', "Big Solidus", '/'), + ('𝈺', "Greek Instrumental Notation Symbol-47", '/'), + ('㇓', "CJK Stroke Sp", '/'), + ('〳', "Vertical Kana Repeat Mark Upper Half", '/'), + ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), + ('ノ', "Katakana Letter No", '/'), + ('丿', "CJK Unified Ideograph-4E3F", '/'), + ('⼃', "Kangxi Radical Slash", '/'), + ('/', "Fullwidth Solidus", '/'), + + ('\', "Fullwidth Reverse Solidus", '\\'), + ('﹨', "Small Reverse Solidus", '\\'), + ('∖', "Set Minus", '\\'), + ('⟍', "Mathematical Falling Diagonal", '\\'), + ('⧵', "Reverse Solidus Operator", '\\'), + ('⧹', "Big Reverse Solidus", '\\'), + ('⧹', "Greek Vocal Notation Symbol-16", '\\'), + ('⧹', "Greek Instrumental Symbol-48", '\\'), + ('㇔', "CJK Stroke D", '\\'), + ('丶', "CJK Unified Ideograph-4E36", '\\'), + ('⼂', "Kangxi Radical Dot", '\\'), + ('、', "Ideographic Comma", '\\'), + ('ヽ', "Katakana Iteration Mark", '\\'), + + ('ꝸ', "Latin Small Letter Um", '&'), + ('&', "Fullwidth Ampersand", '&'), + + ('᛭', "Runic Cross Punctuation", '+'), + ('➕', "Heavy Plus Sign", '+'), + ('𐊛', "Lycian Letter H", '+'), + ('﬩', "Hebrew Letter Alternative Plus Sign", '+'), + ('+', "Fullwidth Plus Sign", '+'), + + ('‹', "Single Left-Pointing Angle Quotation Mark", '<'), + ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'), + ('˂', "Modifier Letter Left Arrowhead", '<'), + ('𝈶', "Greek Instrumental Symbol-40", '<'), + ('ᐸ', "Canadian Syllabics Pa", '<'), + ('ᚲ', "Runic Letter Kauna", '<'), + ('❬', "Medium Left-Pointing Angle Bracket Ornament", '<'), + ('⟨', "Mathematical Left Angle Bracket", '<'), + ('〈', "Left-Pointing Angle Bracket", '<'), + ('〈', "Left Angle Bracket", '<'), + ('㇛', "CJK Stroke Pd", '<'), + ('く', "Hiragana Letter Ku", '<'), + ('𡿨', "CJK Unified Ideograph-21FE8", '<'), + ('《', "Left Double Angle Bracket", '<'), + ('<', "Fullwidth Less-Than Sign", '<'), + + ('᐀', "Canadian Syllabics Hyphen", '='), + ('⹀', "Double Hyphen", '='), + ('゠', "Katakana-Hiragana Double Hyphen", '='), + ('꓿', "Lisu Punctuation Full Stop", '='), + ('=', "Fullwidth Equals Sign", '='), + + ('›', "Single Right-Pointing Angle Quotation Mark", '>'), + ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'), + ('˃', "Modifier Letter Right Arrowhead", '>'), + ('𝈷', "Greek Instrumental Symbol-42", '>'), + ('ᐳ', "Canadian Syllabics Po", '>'), + ('𖼿', "Miao Letter Archaic Zza", '>'), + ('❭', "Medium Right-Pointing Angle Bracket Ornament", '>'), + ('⟩', "Mathematical Right Angle Bracket", '>'), + ('〉', "Right-Pointing Angle Bracket", '>'), + ('〉', "Right Angle Bracket", '>'), + ('》', "Right Double Angle Bracket", '>'), + ('>', "Fullwidth Greater-Than Sign", '>'), +]; + +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add +// fancier error recovery to it, as there will be less overall work to do this way. +const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ + (' ', "Space", Some(token::Whitespace)), + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), + (',', "Comma", Some(token::Comma)), + (';', "Semicolon", Some(token::Semi)), + (':', "Colon", Some(token::Colon)), + ('!', "Exclamation Mark", Some(token::Not)), + ('?', "Question Mark", Some(token::Question)), + ('.', "Period", Some(token::Dot)), + ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), + (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), + ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), + (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))), + ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))), + ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))), + ('*', "Asterisk", Some(token::BinOp(token::Star))), + ('/', "Slash", Some(token::BinOp(token::Slash))), + ('\\', "Backslash", None), + ('&', "Ampersand", Some(token::BinOp(token::And))), + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), + ('<', "Less-Than Sign", Some(token::Lt)), + ('=', "Equals Sign", Some(token::Eq)), + ('>', "Greater-Than Sign", Some(token::Gt)), + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by + // spitting the correct token out. + ('\'', "Single Quote", None), + ('"', "Quotation Mark", None), +]; + +crate fn check_for_substitution<'a>( + reader: &StringReader<'a>, + pos: BytePos, + ch: char, + err: &mut DiagnosticBuilder<'a>, +) -> Option<token::TokenKind> { + let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) { + Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char), + None => return None, + }; + + let span = Span::with_root_ctxt(pos, pos + Pos::from_usize(ch.len_utf8())); + + let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) { + Some((_ascii_char, ascii_name, token)) => (ascii_name, token), + None => { + let msg = format!("substitution character not found for '{}'", ch); + reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); + return None; + } + }; + + // special help suggestion for "directed" double quotes + if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') { + let msg = format!( + "Unicode characters '“' (Left Double Quotation Mark) and \ + '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", + ascii_char, ascii_name + ); + err.span_suggestion( + Span::with_root_ctxt( + pos, + pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()), + ), + &msg, + format!("\"{}\"", s), + Applicability::MaybeIncorrect, + ); + } else { + let msg = format!( + "Unicode character '{}' ({}) looks like '{}' ({}), but it is not", + ch, u_name, ascii_char, ascii_name + ); + err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect); + } + token.clone() +} + +/// Extract string if found at current position with given delimiters +fn peek_delimited(text: &str, from_ch: char, to_ch: char) -> Option<&str> { + let mut chars = text.chars(); + let first_char = chars.next()?; + if first_char != from_ch { + return None; + } + let last_char_idx = chars.as_str().find(to_ch)?; + Some(&chars.as_str()[..last_char_idx]) +} diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs new file mode 100644 index 00000000000..bc857c97742 --- /dev/null +++ b/compiler/rustc_parse/src/lib.rs @@ -0,0 +1,595 @@ +//! The main parser interface. + +#![feature(bool_to_option)] +#![feature(crate_visibility_modifier)] +#![feature(bindings_after_at)] +#![feature(try_blocks)] +#![feature(or_patterns)] + +use rustc_ast as ast; +use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind}; +use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree}; +use rustc_ast_pretty::pprust; +use rustc_data_structures::sync::Lrc; +use rustc_errors::{Diagnostic, FatalError, Level, PResult}; +use rustc_session::parse::ParseSess; +use rustc_span::{symbol::kw, FileName, SourceFile, Span, DUMMY_SP}; + +use smallvec::SmallVec; +use std::mem; +use std::path::Path; +use std::str; + +use tracing::{debug, info}; + +pub const MACRO_ARGUMENTS: Option<&'static str> = Some("macro arguments"); + +#[macro_use] +pub mod parser; +use parser::{emit_unclosed_delims, make_unclosed_delims_error, Parser}; +pub mod lexer; +pub mod validate_attr; + +// A bunch of utility functions of the form `parse_<thing>_from_<source>` +// where <thing> includes crate, expr, item, stmt, tts, and one that +// uses a HOF to parse anything, and <source> includes file and +// `source_str`. + +/// A variant of 'panictry!' that works on a Vec<Diagnostic> instead of a single DiagnosticBuilder. +macro_rules! panictry_buffer { + ($handler:expr, $e:expr) => {{ + use rustc_errors::FatalError; + use std::result::Result::{Err, Ok}; + match $e { + Ok(e) => e, + Err(errs) => { + for e in errs { + $handler.emit_diagnostic(&e); + } + FatalError.raise() + } + } + }}; +} + +pub fn parse_crate_from_file<'a>(input: &Path, sess: &'a ParseSess) -> PResult<'a, ast::Crate> { + let mut parser = new_parser_from_file(sess, input, None); + parser.parse_crate_mod() +} + +pub fn parse_crate_attrs_from_file<'a>( + input: &Path, + sess: &'a ParseSess, +) -> PResult<'a, Vec<ast::Attribute>> { + let mut parser = new_parser_from_file(sess, input, None); + parser.parse_inner_attributes() +} + +pub fn parse_crate_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, +) -> PResult<'_, ast::Crate> { + new_parser_from_source_str(sess, name, source).parse_crate_mod() +} + +pub fn parse_crate_attrs_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, +) -> PResult<'_, Vec<ast::Attribute>> { + new_parser_from_source_str(sess, name, source).parse_inner_attributes() +} + +pub fn parse_stream_from_source_str( + name: FileName, + source: String, + sess: &ParseSess, + override_span: Option<Span>, +) -> TokenStream { + let (stream, mut errors) = + source_file_to_stream(sess, sess.source_map().new_source_file(name, source), override_span); + emit_unclosed_delims(&mut errors, &sess); + stream +} + +/// Creates a new parser from a source string. +pub fn new_parser_from_source_str(sess: &ParseSess, name: FileName, source: String) -> Parser<'_> { + panictry_buffer!(&sess.span_diagnostic, maybe_new_parser_from_source_str(sess, name, source)) +} + +/// Creates a new parser from a source string. Returns any buffered errors from lexing the initial +/// token stream. +pub fn maybe_new_parser_from_source_str( + sess: &ParseSess, + name: FileName, + source: String, +) -> Result<Parser<'_>, Vec<Diagnostic>> { + maybe_source_file_to_parser(sess, sess.source_map().new_source_file(name, source)) +} + +/// Creates a new parser, handling errors as appropriate if the file doesn't exist. +/// If a span is given, that is used on an error as the as the source of the problem. +pub fn new_parser_from_file<'a>(sess: &'a ParseSess, path: &Path, sp: Option<Span>) -> Parser<'a> { + source_file_to_parser(sess, file_to_source_file(sess, path, sp)) +} + +/// Creates a new parser, returning buffered diagnostics if the file doesn't exist, +/// or from lexing the initial token stream. +pub fn maybe_new_parser_from_file<'a>( + sess: &'a ParseSess, + path: &Path, +) -> Result<Parser<'a>, Vec<Diagnostic>> { + let file = try_file_to_source_file(sess, path, None).map_err(|db| vec![db])?; + maybe_source_file_to_parser(sess, file) +} + +/// Given a `source_file` and config, returns a parser. +fn source_file_to_parser(sess: &ParseSess, source_file: Lrc<SourceFile>) -> Parser<'_> { + panictry_buffer!(&sess.span_diagnostic, maybe_source_file_to_parser(sess, source_file)) +} + +/// Given a `source_file` and config, return a parser. Returns any buffered errors from lexing the +/// initial token stream. +fn maybe_source_file_to_parser( + sess: &ParseSess, + source_file: Lrc<SourceFile>, +) -> Result<Parser<'_>, Vec<Diagnostic>> { + let end_pos = source_file.end_pos; + let (stream, unclosed_delims) = maybe_file_to_stream(sess, source_file, None)?; + let mut parser = stream_to_parser(sess, stream, None); + parser.unclosed_delims = unclosed_delims; + if parser.token == token::Eof { + parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); + } + + Ok(parser) +} + +// Must preserve old name for now, because `quote!` from the *existing* +// compiler expands into it. +pub fn new_parser_from_tts(sess: &ParseSess, tts: Vec<TokenTree>) -> Parser<'_> { + stream_to_parser(sess, tts.into_iter().collect(), crate::MACRO_ARGUMENTS) +} + +// Base abstractions + +/// Given a session and a path and an optional span (for error reporting), +/// add the path to the session's source_map and return the new source_file or +/// error when a file can't be read. +fn try_file_to_source_file( + sess: &ParseSess, + path: &Path, + spanopt: Option<Span>, +) -> Result<Lrc<SourceFile>, Diagnostic> { + sess.source_map().load_file(path).map_err(|e| { + let msg = format!("couldn't read {}: {}", path.display(), e); + let mut diag = Diagnostic::new(Level::Fatal, &msg); + if let Some(sp) = spanopt { + diag.set_span(sp); + } + diag + }) +} + +/// Given a session and a path and an optional span (for error reporting), +/// adds the path to the session's `source_map` and returns the new `source_file`. +fn file_to_source_file(sess: &ParseSess, path: &Path, spanopt: Option<Span>) -> Lrc<SourceFile> { + match try_file_to_source_file(sess, path, spanopt) { + Ok(source_file) => source_file, + Err(d) => { + sess.span_diagnostic.emit_diagnostic(&d); + FatalError.raise(); + } + } +} + +/// Given a `source_file`, produces a sequence of token trees. +pub fn source_file_to_stream( + sess: &ParseSess, + source_file: Lrc<SourceFile>, + override_span: Option<Span>, +) -> (TokenStream, Vec<lexer::UnmatchedBrace>) { + panictry_buffer!(&sess.span_diagnostic, maybe_file_to_stream(sess, source_file, override_span)) +} + +/// Given a source file, produces a sequence of token trees. Returns any buffered errors from +/// parsing the token stream. +pub fn maybe_file_to_stream( + sess: &ParseSess, + source_file: Lrc<SourceFile>, + override_span: Option<Span>, +) -> Result<(TokenStream, Vec<lexer::UnmatchedBrace>), Vec<Diagnostic>> { + let srdr = lexer::StringReader::new(sess, source_file, override_span); + let (token_trees, unmatched_braces) = srdr.into_token_trees(); + + match token_trees { + Ok(stream) => Ok((stream, unmatched_braces)), + Err(err) => { + let mut buffer = Vec::with_capacity(1); + err.buffer(&mut buffer); + // Not using `emit_unclosed_delims` to use `db.buffer` + for unmatched in unmatched_braces { + if let Some(err) = make_unclosed_delims_error(unmatched, &sess) { + err.buffer(&mut buffer); + } + } + Err(buffer) + } + } +} + +/// Given a stream and the `ParseSess`, produces a parser. +pub fn stream_to_parser<'a>( + sess: &'a ParseSess, + stream: TokenStream, + subparser_name: Option<&'static str>, +) -> Parser<'a> { + Parser::new(sess, stream, false, subparser_name) +} + +/// Runs the given subparser `f` on the tokens of the given `attr`'s item. +pub fn parse_in<'a, T>( + sess: &'a ParseSess, + tts: TokenStream, + name: &'static str, + mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, +) -> PResult<'a, T> { + let mut parser = Parser::new(sess, tts, false, Some(name)); + let result = f(&mut parser)?; + if parser.token != token::Eof { + parser.unexpected()?; + } + Ok(result) +} + +// NOTE(Centril): The following probably shouldn't be here but it acknowledges the +// fact that architecturally, we are using parsing (read on below to understand why). + +pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream { + // A `Nonterminal` is often a parsed AST item. At this point we now + // need to convert the parsed AST to an actual token stream, e.g. + // un-parse it basically. + // + // Unfortunately there's not really a great way to do that in a + // guaranteed lossless fashion right now. The fallback here is to just + // stringify the AST node and reparse it, but this loses all span + // information. + // + // As a result, some AST nodes are annotated with the token stream they + // came from. Here we attempt to extract these lossless token streams + // before we fall back to the stringification. + let tokens = match *nt { + Nonterminal::NtItem(ref item) => { + prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span) + } + Nonterminal::NtPat(ref pat) => pat.tokens.clone(), + Nonterminal::NtIdent(ident, is_raw) => { + Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into()) + } + Nonterminal::NtLifetime(ident) => { + Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into()) + } + Nonterminal::NtTT(ref tt) => Some(tt.clone().into()), + Nonterminal::NtExpr(ref expr) => { + if expr.tokens.is_none() { + debug!("missing tokens for expr {:?}", expr); + } + prepend_attrs(sess, &expr.attrs, expr.tokens.as_ref(), span) + } + _ => None, + }; + + // FIXME(#43081): Avoid this pretty-print + reparse hack + let source = pprust::nonterminal_to_string(nt); + let filename = FileName::macro_expansion_source_code(&source); + let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span)); + + // During early phases of the compiler the AST could get modified + // directly (e.g., attributes added or removed) and the internal cache + // of tokens my not be invalidated or updated. Consequently if the + // "lossless" token stream disagrees with our actual stringification + // (which has historically been much more battle-tested) then we go + // with the lossy stream anyway (losing span information). + // + // Note that the comparison isn't `==` here to avoid comparing spans, + // but it *also* is a "probable" equality which is a pretty weird + // definition. We mostly want to catch actual changes to the AST + // like a `#[cfg]` being processed or some weird `macro_rules!` + // expansion. + // + // What we *don't* want to catch is the fact that a user-defined + // literal like `0xf` is stringified as `15`, causing the cached token + // stream to not be literal `==` token-wise (ignoring spans) to the + // token stream we got from stringification. + // + // Instead the "probably equal" check here is "does each token + // recursively have the same discriminant?" We basically don't look at + // the token values here and assume that such fine grained token stream + // modifications, including adding/removing typically non-semantic + // tokens such as extra braces and commas, don't happen. + if let Some(tokens) = tokens { + if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) { + return tokens; + } + info!( + "cached tokens found, but they're not \"probably equal\", \ + going with stringified version" + ); + info!("cached tokens: {:?}", tokens); + info!("reparsed tokens: {:?}", tokens_for_real); + } + tokens_for_real +} + +// See comments in `Nonterminal::to_tokenstream` for why we care about +// *probably* equal here rather than actual equality +// +// This is otherwise the same as `eq_unspanned`, only recursing with a +// different method. +pub fn tokenstream_probably_equal_for_proc_macro( + first: &TokenStream, + other: &TokenStream, + sess: &ParseSess, +) -> bool { + // When checking for `probably_eq`, we ignore certain tokens that aren't + // preserved in the AST. Because they are not preserved, the pretty + // printer arbitrarily adds or removes them when printing as token + // streams, making a comparison between a token stream generated from an + // AST and a token stream which was parsed into an AST more reliable. + fn semantic_tree(tree: &TokenTree) -> bool { + if let TokenTree::Token(token) = tree { + if let + // The pretty printer tends to add trailing commas to + // everything, and in particular, after struct fields. + | token::Comma + // The pretty printer emits `NoDelim` as whitespace. + | token::OpenDelim(DelimToken::NoDelim) + | token::CloseDelim(DelimToken::NoDelim) + // The pretty printer collapses many semicolons into one. + | token::Semi + // The pretty printer collapses whitespace arbitrarily and can + // introduce whitespace from `NoDelim`. + | token::Whitespace + // The pretty printer can turn `$crate` into `::crate_name` + | token::ModSep = token.kind { + return false; + } + } + true + } + + // When comparing two `TokenStream`s, we ignore the `IsJoint` information. + // + // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will + // use `Token.glue` on adjacent tokens with the proper `IsJoint`. + // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`) + // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent + // when determining if two `TokenStream`s are 'probably equal'. + // + // Therefore, we use `break_two_token_op` to convert all tokens + // to the 'unglued' form (if it exists). This ensures that two + // `TokenStream`s which differ only in how their tokens are glued + // will be considered 'probably equal', which allows us to keep spans. + // + // This is important when the original `TokenStream` contained + // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces + // will be omitted when we pretty-print, which can cause the original + // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`, + // leading to some tokens being 'glued' together in one stream but not + // the other. See #68489 for more details. + fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> { + // In almost all cases, we should have either zero or one levels + // of 'unglueing'. However, in some unusual cases, we may need + // to iterate breaking tokens mutliple times. For example: + // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]' + let mut token_trees: SmallVec<[_; 2]>; + if let TokenTree::Token(token) = &tree { + let mut out = SmallVec::<[_; 2]>::new(); + out.push(token.clone()); + // Iterate to fixpoint: + // * We start off with 'out' containing our initial token, and `temp` empty + // * If we are able to break any tokens in `out`, then `out` will have + // at least one more element than 'temp', so we will try to break tokens + // again. + // * If we cannot break any tokens in 'out', we are done + loop { + let mut temp = SmallVec::<[_; 2]>::new(); + let mut changed = false; + + for token in out.into_iter() { + if let Some((first, second)) = token.kind.break_two_token_op() { + temp.push(Token::new(first, DUMMY_SP)); + temp.push(Token::new(second, DUMMY_SP)); + changed = true; + } else { + temp.push(token); + } + } + out = temp; + if !changed { + break; + } + } + token_trees = out.into_iter().map(TokenTree::Token).collect(); + } else { + token_trees = SmallVec::new(); + token_trees.push(tree); + } + token_trees.into_iter() + } + + let expand_nt = |tree: TokenTree| { + if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree { + // When checking tokenstreams for 'probable equality', we are comparing + // a captured (from parsing) `TokenStream` to a reparsed tokenstream. + // The reparsed Tokenstream will never have `None`-delimited groups, + // since they are only ever inserted as a result of macro expansion. + // Therefore, inserting a `None`-delimtied group here (when we + // convert a nested `Nonterminal` to a tokenstream) would cause + // a mismatch with the reparsed tokenstream. + // + // Note that we currently do not handle the case where the + // reparsed stream has a `Parenthesis`-delimited group + // inserted. This will cause a spurious mismatch: + // issue #75734 tracks resolving this. + nt_to_tokenstream(nt, sess, *span).into_trees() + } else { + TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees() + } + }; + + // Break tokens after we expand any nonterminals, so that we break tokens + // that are produced as a result of nonterminal expansion. + let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens); + let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens); + for (t1, t2) in t1.by_ref().zip(t2.by_ref()) { + if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) { + return false; + } + } + t1.next().is_none() && t2.next().is_none() +} + +// See comments in `Nonterminal::to_tokenstream` for why we care about +// *probably* equal here rather than actual equality +// +// This is otherwise the same as `eq_unspanned`, only recursing with a +// different method. +pub fn tokentree_probably_equal_for_proc_macro( + first: &TokenTree, + other: &TokenTree, + sess: &ParseSess, +) -> bool { + match (first, other) { + (TokenTree::Token(token), TokenTree::Token(token2)) => { + token_probably_equal_for_proc_macro(token, token2) + } + (TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => { + delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess) + } + _ => false, + } +} + +// See comments in `Nonterminal::to_tokenstream` for why we care about +// *probably* equal here rather than actual equality +fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool { + if mem::discriminant(&first.kind) != mem::discriminant(&other.kind) { + return false; + } + use rustc_ast::token::TokenKind::*; + match (&first.kind, &other.kind) { + (&Eq, &Eq) + | (&Lt, &Lt) + | (&Le, &Le) + | (&EqEq, &EqEq) + | (&Ne, &Ne) + | (&Ge, &Ge) + | (&Gt, &Gt) + | (&AndAnd, &AndAnd) + | (&OrOr, &OrOr) + | (&Not, &Not) + | (&Tilde, &Tilde) + | (&At, &At) + | (&Dot, &Dot) + | (&DotDot, &DotDot) + | (&DotDotDot, &DotDotDot) + | (&DotDotEq, &DotDotEq) + | (&Comma, &Comma) + | (&Semi, &Semi) + | (&Colon, &Colon) + | (&ModSep, &ModSep) + | (&RArrow, &RArrow) + | (&LArrow, &LArrow) + | (&FatArrow, &FatArrow) + | (&Pound, &Pound) + | (&Dollar, &Dollar) + | (&Question, &Question) + | (&Whitespace, &Whitespace) + | (&Comment, &Comment) + | (&Eof, &Eof) => true, + + (&BinOp(a), &BinOp(b)) | (&BinOpEq(a), &BinOpEq(b)) => a == b, + + (&OpenDelim(a), &OpenDelim(b)) | (&CloseDelim(a), &CloseDelim(b)) => a == b, + + (&DocComment(a1, a2, a3), &DocComment(b1, b2, b3)) => a1 == b1 && a2 == b2 && a3 == b3, + + (&Shebang(a), &Shebang(b)) => a == b, + + (&Literal(a), &Literal(b)) => a == b, + + (&Lifetime(a), &Lifetime(b)) => a == b, + (&Ident(a, b), &Ident(c, d)) => { + b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate) + } + + (&Interpolated(..), &Interpolated(..)) => panic!("Unexpanded Interpolated!"), + + _ => panic!("forgot to add a token?"), + } +} + +fn prepend_attrs( + sess: &ParseSess, + attrs: &[ast::Attribute], + tokens: Option<&tokenstream::TokenStream>, + span: rustc_span::Span, +) -> Option<tokenstream::TokenStream> { + let tokens = tokens?; + if attrs.is_empty() { + return Some(tokens.clone()); + } + let mut builder = tokenstream::TokenStreamBuilder::new(); + for attr in attrs { + assert_eq!( + attr.style, + ast::AttrStyle::Outer, + "inner attributes should prevent cached tokens from existing" + ); + + let source = pprust::attribute_to_string(attr); + let macro_filename = FileName::macro_expansion_source_code(&source); + + let item = match attr.kind { + ast::AttrKind::Normal(ref item) => item, + ast::AttrKind::DocComment(..) => { + let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span)); + builder.push(stream); + continue; + } + }; + + // synthesize # [ $path $tokens ] manually here + let mut brackets = tokenstream::TokenStreamBuilder::new(); + + // For simple paths, push the identifier directly + if item.path.segments.len() == 1 && item.path.segments[0].args.is_none() { + let ident = item.path.segments[0].ident; + let token = token::Ident(ident.name, ident.as_str().starts_with("r#")); + brackets.push(tokenstream::TokenTree::token(token, ident.span)); + + // ... and for more complicated paths, fall back to a reparse hack that + // should eventually be removed. + } else { + let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span)); + brackets.push(stream); + } + + brackets.push(item.args.outer_tokens()); + + // The span we list here for `#` and for `[ ... ]` are both wrong in + // that it encompasses more than each token, but it hopefully is "good + // enough" for now at least. + builder.push(tokenstream::TokenTree::token(token::Pound, attr.span)); + let delim_span = tokenstream::DelimSpan::from_single(attr.span); + builder.push(tokenstream::TokenTree::Delimited( + delim_span, + token::DelimToken::Bracket, + brackets.build(), + )); + } + builder.push(tokens.clone()); + Some(builder.build()) +} diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs new file mode 100644 index 00000000000..4e4429e461f --- /dev/null +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -0,0 +1,304 @@ +use super::{Parser, PathStyle}; +use rustc_ast as ast; +use rustc_ast::attr; +use rustc_ast::token::{self, Nonterminal}; +use rustc_ast_pretty::pprust; +use rustc_errors::{error_code, PResult}; +use rustc_span::Span; + +use tracing::debug; + +#[derive(Debug)] +pub(super) enum InnerAttrPolicy<'a> { + Permitted, + Forbidden { reason: &'a str, saw_doc_comment: bool, prev_attr_sp: Option<Span> }, +} + +const DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG: &str = "an inner attribute is not \ + permitted in this context"; + +pub(super) const DEFAULT_INNER_ATTR_FORBIDDEN: InnerAttrPolicy<'_> = InnerAttrPolicy::Forbidden { + reason: DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG, + saw_doc_comment: false, + prev_attr_sp: None, +}; + +impl<'a> Parser<'a> { + /// Parses attributes that appear before an item. + pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> { + let mut attrs: Vec<ast::Attribute> = Vec::new(); + let mut just_parsed_doc_comment = false; + loop { + debug!("parse_outer_attributes: self.token={:?}", self.token); + if self.check(&token::Pound) { + let inner_error_reason = if just_parsed_doc_comment { + "an inner attribute is not permitted following an outer doc comment" + } else if !attrs.is_empty() { + "an inner attribute is not permitted following an outer attribute" + } else { + DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG + }; + let inner_parse_policy = InnerAttrPolicy::Forbidden { + reason: inner_error_reason, + saw_doc_comment: just_parsed_doc_comment, + prev_attr_sp: attrs.last().map(|a| a.span), + }; + let attr = self.parse_attribute_with_inner_parse_policy(inner_parse_policy)?; + attrs.push(attr); + just_parsed_doc_comment = false; + } else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind { + let attr = attr::mk_doc_comment(comment_kind, attr_style, data, self.token.span); + if attr.style != ast::AttrStyle::Outer { + self.sess + .span_diagnostic + .struct_span_err_with_code( + self.token.span, + "expected outer doc comment", + error_code!(E0753), + ) + .note( + "inner doc comments like this (starting with \ + `//!` or `/*!`) can only appear before items", + ) + .emit(); + } + attrs.push(attr); + self.bump(); + just_parsed_doc_comment = true; + } else { + break; + } + } + Ok(attrs) + } + + /// Matches `attribute = # ! [ meta_item ]`. + /// + /// If `permit_inner` is `true`, then a leading `!` indicates an inner + /// attribute. + pub fn parse_attribute(&mut self, permit_inner: bool) -> PResult<'a, ast::Attribute> { + debug!("parse_attribute: permit_inner={:?} self.token={:?}", permit_inner, self.token); + let inner_parse_policy = + if permit_inner { InnerAttrPolicy::Permitted } else { DEFAULT_INNER_ATTR_FORBIDDEN }; + self.parse_attribute_with_inner_parse_policy(inner_parse_policy) + } + + /// The same as `parse_attribute`, except it takes in an `InnerAttrPolicy` + /// that prescribes how to handle inner attributes. + fn parse_attribute_with_inner_parse_policy( + &mut self, + inner_parse_policy: InnerAttrPolicy<'_>, + ) -> PResult<'a, ast::Attribute> { + debug!( + "parse_attribute_with_inner_parse_policy: inner_parse_policy={:?} self.token={:?}", + inner_parse_policy, self.token + ); + let lo = self.token.span; + let (span, item, style) = if self.eat(&token::Pound) { + let style = + if self.eat(&token::Not) { ast::AttrStyle::Inner } else { ast::AttrStyle::Outer }; + + self.expect(&token::OpenDelim(token::Bracket))?; + let item = self.parse_attr_item()?; + self.expect(&token::CloseDelim(token::Bracket))?; + let attr_sp = lo.to(self.prev_token.span); + + // Emit error if inner attribute is encountered and forbidden. + if style == ast::AttrStyle::Inner { + self.error_on_forbidden_inner_attr(attr_sp, inner_parse_policy); + } + + (attr_sp, item, style) + } else { + let token_str = pprust::token_to_string(&self.token); + let msg = &format!("expected `#`, found `{}`", token_str); + return Err(self.struct_span_err(self.token.span, msg)); + }; + + Ok(attr::mk_attr_from_item(style, item, span)) + } + + pub(super) fn error_on_forbidden_inner_attr(&self, attr_sp: Span, policy: InnerAttrPolicy<'_>) { + if let InnerAttrPolicy::Forbidden { reason, saw_doc_comment, prev_attr_sp } = policy { + let prev_attr_note = + if saw_doc_comment { "previous doc comment" } else { "previous outer attribute" }; + + let mut diag = self.struct_span_err(attr_sp, reason); + + if let Some(prev_attr_sp) = prev_attr_sp { + diag.span_label(attr_sp, "not permitted following an outer attribute") + .span_label(prev_attr_sp, prev_attr_note); + } + + diag.note( + "inner attributes, like `#![no_std]`, annotate the item enclosing them, \ + and are usually found at the beginning of source files. \ + Outer attributes, like `#[test]`, annotate the item following them.", + ) + .emit(); + } + } + + /// Parses an inner part of an attribute (the path and following tokens). + /// The tokens must be either a delimited token stream, or empty token stream, + /// or the "legacy" key-value form. + /// PATH `(` TOKEN_STREAM `)` + /// PATH `[` TOKEN_STREAM `]` + /// PATH `{` TOKEN_STREAM `}` + /// PATH + /// PATH `=` UNSUFFIXED_LIT + /// The delimiters or `=` are still put into the resulting token stream. + pub fn parse_attr_item(&mut self) -> PResult<'a, ast::AttrItem> { + let item = match self.token.kind { + token::Interpolated(ref nt) => match **nt { + Nonterminal::NtMeta(ref item) => Some(item.clone().into_inner()), + _ => None, + }, + _ => None, + }; + Ok(if let Some(item) = item { + self.bump(); + item + } else { + let path = self.parse_path(PathStyle::Mod)?; + let args = self.parse_attr_args()?; + ast::AttrItem { path, args } + }) + } + + /// Parses attributes that appear after the opening of an item. These should + /// be preceded by an exclamation mark, but we accept and warn about one + /// terminated by a semicolon. + /// + /// Matches `inner_attrs*`. + crate fn parse_inner_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> { + let mut attrs: Vec<ast::Attribute> = vec![]; + loop { + // Only try to parse if it is an inner attribute (has `!`). + if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) { + let attr = self.parse_attribute(true)?; + assert_eq!(attr.style, ast::AttrStyle::Inner); + attrs.push(attr); + } else if let token::DocComment(comment_kind, attr_style, data) = self.token.kind { + // We need to get the position of this token before we bump. + let attr = attr::mk_doc_comment(comment_kind, attr_style, data, self.token.span); + if attr.style == ast::AttrStyle::Inner { + attrs.push(attr); + self.bump(); + } else { + break; + } + } else { + break; + } + } + Ok(attrs) + } + + crate fn parse_unsuffixed_lit(&mut self) -> PResult<'a, ast::Lit> { + let lit = self.parse_lit()?; + debug!("checking if {:?} is unusuffixed", lit); + + if !lit.kind.is_unsuffixed() { + self.struct_span_err(lit.span, "suffixed literals are not allowed in attributes") + .help( + "instead of using a suffixed literal (`1u8`, `1.0f32`, etc.), \ + use an unsuffixed version (`1`, `1.0`, etc.)", + ) + .emit(); + } + + Ok(lit) + } + + /// Parses `cfg_attr(pred, attr_item_list)` where `attr_item_list` is comma-delimited. + pub fn parse_cfg_attr(&mut self) -> PResult<'a, (ast::MetaItem, Vec<(ast::AttrItem, Span)>)> { + let cfg_predicate = self.parse_meta_item()?; + self.expect(&token::Comma)?; + + // Presumably, the majority of the time there will only be one attr. + let mut expanded_attrs = Vec::with_capacity(1); + while self.token.kind != token::Eof { + let lo = self.token.span; + let item = self.parse_attr_item()?; + expanded_attrs.push((item, lo.to(self.prev_token.span))); + if !self.eat(&token::Comma) { + break; + } + } + + Ok((cfg_predicate, expanded_attrs)) + } + + /// Matches `COMMASEP(meta_item_inner)`. + crate fn parse_meta_seq_top(&mut self) -> PResult<'a, Vec<ast::NestedMetaItem>> { + // Presumably, the majority of the time there will only be one attr. + let mut nmis = Vec::with_capacity(1); + while self.token.kind != token::Eof { + nmis.push(self.parse_meta_item_inner()?); + if !self.eat(&token::Comma) { + break; + } + } + Ok(nmis) + } + + /// Matches the following grammar (per RFC 1559). + /// + /// meta_item : PATH ( '=' UNSUFFIXED_LIT | '(' meta_item_inner? ')' )? ; + /// meta_item_inner : (meta_item | UNSUFFIXED_LIT) (',' meta_item_inner)? ; + pub fn parse_meta_item(&mut self) -> PResult<'a, ast::MetaItem> { + let nt_meta = match self.token.kind { + token::Interpolated(ref nt) => match **nt { + token::NtMeta(ref e) => Some(e.clone()), + _ => None, + }, + _ => None, + }; + + if let Some(item) = nt_meta { + return match item.meta(item.path.span) { + Some(meta) => { + self.bump(); + Ok(meta) + } + None => self.unexpected(), + }; + } + + let lo = self.token.span; + let path = self.parse_path(PathStyle::Mod)?; + let kind = self.parse_meta_item_kind()?; + let span = lo.to(self.prev_token.span); + Ok(ast::MetaItem { path, kind, span }) + } + + crate fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> { + Ok(if self.eat(&token::Eq) { + ast::MetaItemKind::NameValue(self.parse_unsuffixed_lit()?) + } else if self.check(&token::OpenDelim(token::Paren)) { + // Matches `meta_seq = ( COMMASEP(meta_item_inner) )`. + let (list, _) = self.parse_paren_comma_seq(|p| p.parse_meta_item_inner())?; + ast::MetaItemKind::List(list) + } else { + ast::MetaItemKind::Word + }) + } + + /// Matches `meta_item_inner : (meta_item | UNSUFFIXED_LIT) ;`. + fn parse_meta_item_inner(&mut self) -> PResult<'a, ast::NestedMetaItem> { + match self.parse_unsuffixed_lit() { + Ok(lit) => return Ok(ast::NestedMetaItem::Literal(lit)), + Err(ref mut err) => err.cancel(), + } + + match self.parse_meta_item() { + Ok(mi) => return Ok(ast::NestedMetaItem::MetaItem(mi)), + Err(ref mut err) => err.cancel(), + } + + let found = pprust::token_to_string(&self.token); + let msg = format!("expected unsuffixed literal or identifier, found `{}`", found); + Err(self.struct_span_err(self.token.span, &msg)) + } +} diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs new file mode 100644 index 00000000000..12efe391fb9 --- /dev/null +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -0,0 +1,1643 @@ +use super::ty::AllowPlus; +use super::{BlockMode, Parser, PathStyle, SemiColonMode, SeqSep, TokenExpectType, TokenType}; + +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Lit, LitKind, TokenKind}; +use rustc_ast::util::parser::AssocOp; +use rustc_ast::{ + self as ast, AngleBracketedArgs, AttrVec, BinOpKind, BindingMode, BlockCheckMode, Expr, + ExprKind, Item, ItemKind, Mutability, Param, Pat, PatKind, PathSegment, QSelf, Ty, TyKind, +}; +use rustc_ast_pretty::pprust; +use rustc_data_structures::fx::FxHashSet; +use rustc_errors::{pluralize, struct_span_err}; +use rustc_errors::{Applicability, DiagnosticBuilder, Handler, PResult}; +use rustc_span::source_map::Spanned; +use rustc_span::symbol::{kw, Ident}; +use rustc_span::{MultiSpan, Span, SpanSnippetError, DUMMY_SP}; + +use tracing::{debug, trace}; + +const TURBOFISH: &str = "use `::<...>` instead of `<...>` to specify type arguments"; + +/// Creates a placeholder argument. +pub(super) fn dummy_arg(ident: Ident) -> Param { + let pat = P(Pat { + id: ast::DUMMY_NODE_ID, + kind: PatKind::Ident(BindingMode::ByValue(Mutability::Not), ident, None), + span: ident.span, + tokens: None, + }); + let ty = Ty { kind: TyKind::Err, span: ident.span, id: ast::DUMMY_NODE_ID }; + Param { + attrs: AttrVec::default(), + id: ast::DUMMY_NODE_ID, + pat, + span: ident.span, + ty: P(ty), + is_placeholder: false, + } +} + +pub enum Error { + UselessDocComment, +} + +impl Error { + fn span_err(self, sp: impl Into<MultiSpan>, handler: &Handler) -> DiagnosticBuilder<'_> { + match self { + Error::UselessDocComment => { + let mut err = struct_span_err!( + handler, + sp, + E0585, + "found a documentation comment that doesn't document anything", + ); + err.help( + "doc comments must come before what they document, maybe a comment was \ + intended with `//`?", + ); + err + } + } + } +} + +pub(super) trait RecoverQPath: Sized + 'static { + const PATH_STYLE: PathStyle = PathStyle::Expr; + fn to_ty(&self) -> Option<P<Ty>>; + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self; +} + +impl RecoverQPath for Ty { + const PATH_STYLE: PathStyle = PathStyle::Type; + fn to_ty(&self) -> Option<P<Ty>> { + Some(P(self.clone())) + } + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self { + Self { span: path.span, kind: TyKind::Path(qself, path), id: ast::DUMMY_NODE_ID } + } +} + +impl RecoverQPath for Pat { + fn to_ty(&self) -> Option<P<Ty>> { + self.to_ty() + } + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self { + Self { + span: path.span, + kind: PatKind::Path(qself, path), + id: ast::DUMMY_NODE_ID, + tokens: None, + } + } +} + +impl RecoverQPath for Expr { + fn to_ty(&self) -> Option<P<Ty>> { + self.to_ty() + } + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self { + Self { + span: path.span, + kind: ExprKind::Path(qself, path), + attrs: AttrVec::new(), + id: ast::DUMMY_NODE_ID, + tokens: None, + } + } +} + +/// Control whether the closing delimiter should be consumed when calling `Parser::consume_block`. +crate enum ConsumeClosingDelim { + Yes, + No, +} + +impl<'a> Parser<'a> { + pub(super) fn span_fatal_err<S: Into<MultiSpan>>( + &self, + sp: S, + err: Error, + ) -> DiagnosticBuilder<'a> { + err.span_err(sp, self.diagnostic()) + } + + pub fn struct_span_err<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { + self.sess.span_diagnostic.struct_span_err(sp, m) + } + + pub fn span_bug<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> ! { + self.sess.span_diagnostic.span_bug(sp, m) + } + + pub(super) fn diagnostic(&self) -> &'a Handler { + &self.sess.span_diagnostic + } + + pub(super) fn span_to_snippet(&self, span: Span) -> Result<String, SpanSnippetError> { + self.sess.source_map().span_to_snippet(span) + } + + pub(super) fn expected_ident_found(&self) -> DiagnosticBuilder<'a> { + let mut err = self.struct_span_err( + self.token.span, + &format!("expected identifier, found {}", super::token_descr(&self.token)), + ); + let valid_follow = &[ + TokenKind::Eq, + TokenKind::Colon, + TokenKind::Comma, + TokenKind::Semi, + TokenKind::ModSep, + TokenKind::OpenDelim(token::DelimToken::Brace), + TokenKind::OpenDelim(token::DelimToken::Paren), + TokenKind::CloseDelim(token::DelimToken::Brace), + TokenKind::CloseDelim(token::DelimToken::Paren), + ]; + match self.token.ident() { + Some((ident, false)) + if ident.is_raw_guess() + && self.look_ahead(1, |t| valid_follow.contains(&t.kind)) => + { + err.span_suggestion( + ident.span, + "you can escape reserved keywords to use them as identifiers", + format!("r#{}", ident.name), + Applicability::MaybeIncorrect, + ); + } + _ => {} + } + if let Some(token_descr) = super::token_descr_opt(&self.token) { + err.span_label(self.token.span, format!("expected identifier, found {}", token_descr)); + } else { + err.span_label(self.token.span, "expected identifier"); + if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) { + err.span_suggestion( + self.token.span, + "remove this comma", + String::new(), + Applicability::MachineApplicable, + ); + } + } + err + } + + pub(super) fn expected_one_of_not_found( + &mut self, + edible: &[TokenKind], + inedible: &[TokenKind], + ) -> PResult<'a, bool /* recovered */> { + fn tokens_to_string(tokens: &[TokenType]) -> String { + let mut i = tokens.iter(); + // This might be a sign we need a connect method on `Iterator`. + let b = i.next().map_or(String::new(), |t| t.to_string()); + i.enumerate().fold(b, |mut b, (i, a)| { + if tokens.len() > 2 && i == tokens.len() - 2 { + b.push_str(", or "); + } else if tokens.len() == 2 && i == tokens.len() - 2 { + b.push_str(" or "); + } else { + b.push_str(", "); + } + b.push_str(&a.to_string()); + b + }) + } + + let mut expected = edible + .iter() + .map(|x| TokenType::Token(x.clone())) + .chain(inedible.iter().map(|x| TokenType::Token(x.clone()))) + .chain(self.expected_tokens.iter().cloned()) + .collect::<Vec<_>>(); + expected.sort_by_cached_key(|x| x.to_string()); + expected.dedup(); + let expect = tokens_to_string(&expected[..]); + let actual = super::token_descr(&self.token); + let (msg_exp, (label_sp, label_exp)) = if expected.len() > 1 { + let short_expect = if expected.len() > 6 { + format!("{} possible tokens", expected.len()) + } else { + expect.clone() + }; + ( + format!("expected one of {}, found {}", expect, actual), + (self.prev_token.span.shrink_to_hi(), format!("expected one of {}", short_expect)), + ) + } else if expected.is_empty() { + ( + format!("unexpected token: {}", actual), + (self.prev_token.span, "unexpected token after this".to_string()), + ) + } else { + ( + format!("expected {}, found {}", expect, actual), + (self.prev_token.span.shrink_to_hi(), format!("expected {}", expect)), + ) + }; + self.last_unexpected_token_span = Some(self.token.span); + let mut err = self.struct_span_err(self.token.span, &msg_exp); + let sp = if self.token == token::Eof { + // This is EOF; don't want to point at the following char, but rather the last token. + self.prev_token.span + } else { + label_sp + }; + match self.recover_closing_delimiter( + &expected + .iter() + .filter_map(|tt| match tt { + TokenType::Token(t) => Some(t.clone()), + _ => None, + }) + .collect::<Vec<_>>(), + err, + ) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + + if self.check_too_many_raw_str_terminators(&mut err) { + return Err(err); + } + + let sm = self.sess.source_map(); + if self.prev_token.span == DUMMY_SP { + // Account for macro context where the previous span might not be + // available to avoid incorrect output (#54841). + err.span_label(self.token.span, label_exp); + } else if !sm.is_multiline(self.token.span.shrink_to_hi().until(sp.shrink_to_lo())) { + // When the spans are in the same line, it means that the only content between + // them is whitespace, point at the found token in that case: + // + // X | () => { syntax error }; + // | ^^^^^ expected one of 8 possible tokens here + // + // instead of having: + // + // X | () => { syntax error }; + // | -^^^^^ unexpected token + // | | + // | expected one of 8 possible tokens here + err.span_label(self.token.span, label_exp); + } else { + err.span_label(sp, label_exp); + err.span_label(self.token.span, "unexpected token"); + } + self.maybe_annotate_with_ascription(&mut err, false); + Err(err) + } + + fn check_too_many_raw_str_terminators(&mut self, err: &mut DiagnosticBuilder<'_>) -> bool { + match (&self.prev_token.kind, &self.token.kind) { + ( + TokenKind::Literal(Lit { + kind: LitKind::StrRaw(n_hashes) | LitKind::ByteStrRaw(n_hashes), + .. + }), + TokenKind::Pound, + ) => { + err.set_primary_message("too many `#` when terminating raw string"); + err.span_suggestion( + self.token.span, + "remove the extra `#`", + String::new(), + Applicability::MachineApplicable, + ); + err.note(&format!("the raw string started with {} `#`s", n_hashes)); + true + } + _ => false, + } + } + + pub fn maybe_annotate_with_ascription( + &mut self, + err: &mut DiagnosticBuilder<'_>, + maybe_expected_semicolon: bool, + ) { + if let Some((sp, likely_path)) = self.last_type_ascription.take() { + let sm = self.sess.source_map(); + let next_pos = sm.lookup_char_pos(self.token.span.lo()); + let op_pos = sm.lookup_char_pos(sp.hi()); + + let allow_unstable = self.sess.unstable_features.is_nightly_build(); + + if likely_path { + err.span_suggestion( + sp, + "maybe write a path separator here", + "::".to_string(), + if allow_unstable { + Applicability::MaybeIncorrect + } else { + Applicability::MachineApplicable + }, + ); + self.sess.type_ascription_path_suggestions.borrow_mut().insert(sp); + } else if op_pos.line != next_pos.line && maybe_expected_semicolon { + err.span_suggestion( + sp, + "try using a semicolon", + ";".to_string(), + Applicability::MaybeIncorrect, + ); + } else if allow_unstable { + err.span_label(sp, "tried to parse a type due to this type ascription"); + } else { + err.span_label(sp, "tried to parse a type due to this"); + } + if allow_unstable { + // Give extra information about type ascription only if it's a nightly compiler. + err.note( + "`#![feature(type_ascription)]` lets you annotate an expression with a type: \ + `<expr>: <type>`", + ); + if !likely_path { + // Avoid giving too much info when it was likely an unrelated typo. + err.note( + "see issue #23416 <https://github.com/rust-lang/rust/issues/23416> \ + for more information", + ); + } + } + } + } + + /// Eats and discards tokens until one of `kets` is encountered. Respects token trees, + /// passes through any errors encountered. Used for error recovery. + pub(super) fn eat_to_tokens(&mut self, kets: &[&TokenKind]) { + if let Err(ref mut err) = + self.parse_seq_to_before_tokens(kets, SeqSep::none(), TokenExpectType::Expect, |p| { + Ok(p.parse_token_tree()) + }) + { + err.cancel(); + } + } + + /// This function checks if there are trailing angle brackets and produces + /// a diagnostic to suggest removing them. + /// + /// ```ignore (diagnostic) + /// let _ = vec![1, 2, 3].into_iter().collect::<Vec<usize>>>>(); + /// ^^ help: remove extra angle brackets + /// ``` + /// + /// If `true` is returned, then trailing brackets were recovered, tokens were consumed + /// up until one of the tokens in 'end' was encountered, and an error was emitted. + pub(super) fn check_trailing_angle_brackets( + &mut self, + segment: &PathSegment, + end: &[&TokenKind], + ) -> bool { + // This function is intended to be invoked after parsing a path segment where there are two + // cases: + // + // 1. A specific token is expected after the path segment. + // eg. `x.foo(`, `x.foo::<u32>(` (parenthesis - method call), + // `Foo::`, or `Foo::<Bar>::` (mod sep - continued path). + // 2. No specific token is expected after the path segment. + // eg. `x.foo` (field access) + // + // This function is called after parsing `.foo` and before parsing the token `end` (if + // present). This includes any angle bracket arguments, such as `.foo::<u32>` or + // `Foo::<Bar>`. + + // We only care about trailing angle brackets if we previously parsed angle bracket + // arguments. This helps stop us incorrectly suggesting that extra angle brackets be + // removed in this case: + // + // `x.foo >> (3)` (where `x.foo` is a `u32` for example) + // + // This case is particularly tricky as we won't notice it just looking at the tokens - + // it will appear the same (in terms of upcoming tokens) as below (since the `::<u32>` will + // have already been parsed): + // + // `x.foo::<u32>>>(3)` + let parsed_angle_bracket_args = + segment.args.as_ref().map(|args| args.is_angle_bracketed()).unwrap_or(false); + + debug!( + "check_trailing_angle_brackets: parsed_angle_bracket_args={:?}", + parsed_angle_bracket_args, + ); + if !parsed_angle_bracket_args { + return false; + } + + // Keep the span at the start so we can highlight the sequence of `>` characters to be + // removed. + let lo = self.token.span; + + // We need to look-ahead to see if we have `>` characters without moving the cursor forward + // (since we might have the field access case and the characters we're eating are + // actual operators and not trailing characters - ie `x.foo >> 3`). + let mut position = 0; + + // We can encounter `>` or `>>` tokens in any order, so we need to keep track of how + // many of each (so we can correctly pluralize our error messages) and continue to + // advance. + let mut number_of_shr = 0; + let mut number_of_gt = 0; + while self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + if *t == token::BinOp(token::BinOpToken::Shr) { + number_of_shr += 1; + true + } else if *t == token::Gt { + number_of_gt += 1; + true + } else { + false + } + }) { + position += 1; + } + + // If we didn't find any trailing `>` characters, then we have nothing to error about. + debug!( + "check_trailing_angle_brackets: number_of_gt={:?} number_of_shr={:?}", + number_of_gt, number_of_shr, + ); + if number_of_gt < 1 && number_of_shr < 1 { + return false; + } + + // Finally, double check that we have our end token as otherwise this is the + // second case. + if self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + end.contains(&&t.kind) + }) { + // Eat from where we started until the end token so that parsing can continue + // as if we didn't have those extra angle brackets. + self.eat_to_tokens(end); + let span = lo.until(self.token.span); + + let total_num_of_gt = number_of_gt + number_of_shr * 2; + self.struct_span_err( + span, + &format!("unmatched angle bracket{}", pluralize!(total_num_of_gt)), + ) + .span_suggestion( + span, + &format!("remove extra angle bracket{}", pluralize!(total_num_of_gt)), + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + return true; + } + false + } + + /// Check if a method call with an intended turbofish has been written without surrounding + /// angle brackets. + pub(super) fn check_turbofish_missing_angle_brackets(&mut self, segment: &mut PathSegment) { + if token::ModSep == self.token.kind && segment.args.is_none() { + let snapshot = self.clone(); + self.bump(); + let lo = self.token.span; + match self.parse_angle_args() { + Ok(args) => { + let span = lo.to(self.prev_token.span); + // Detect trailing `>` like in `x.collect::Vec<_>>()`. + let mut trailing_span = self.prev_token.span.shrink_to_hi(); + while self.token.kind == token::BinOp(token::Shr) + || self.token.kind == token::Gt + { + trailing_span = trailing_span.to(self.token.span); + self.bump(); + } + if self.token.kind == token::OpenDelim(token::Paren) { + // Recover from bad turbofish: `foo.collect::Vec<_>()`. + let args = AngleBracketedArgs { args, span }.into(); + segment.args = args; + + self.struct_span_err( + span, + "generic parameters without surrounding angle brackets", + ) + .multipart_suggestion( + "surround the type parameters with angle brackets", + vec![ + (span.shrink_to_lo(), "<".to_string()), + (trailing_span, ">".to_string()), + ], + Applicability::MachineApplicable, + ) + .emit(); + } else { + // This doesn't look like an invalid turbofish, can't recover parse state. + *self = snapshot; + } + } + Err(mut err) => { + // We could't parse generic parameters, unlikely to be a turbofish. Rely on + // generic parse error instead. + err.cancel(); + *self = snapshot; + } + } + } + } + + /// Check to see if a pair of chained operators looks like an attempt at chained comparison, + /// e.g. `1 < x <= 3`. If so, suggest either splitting the comparison into two, or + /// parenthesising the leftmost comparison. + fn attempt_chained_comparison_suggestion( + &mut self, + err: &mut DiagnosticBuilder<'_>, + inner_op: &Expr, + outer_op: &Spanned<AssocOp>, + ) -> bool /* advanced the cursor */ { + if let ExprKind::Binary(op, ref l1, ref r1) = inner_op.kind { + if let ExprKind::Field(_, ident) = l1.kind { + if ident.as_str().parse::<i32>().is_err() && !matches!(r1.kind, ExprKind::Lit(_)) { + // The parser has encountered `foo.bar<baz`, the likelihood of the turbofish + // suggestion being the only one to apply is high. + return false; + } + } + let mut enclose = |left: Span, right: Span| { + err.multipart_suggestion( + "parenthesize the comparison", + vec![ + (left.shrink_to_lo(), "(".to_string()), + (right.shrink_to_hi(), ")".to_string()), + ], + Applicability::MaybeIncorrect, + ); + }; + return match (op.node, &outer_op.node) { + // `x == y == z` + (BinOpKind::Eq, AssocOp::Equal) | + // `x < y < z` and friends. + (BinOpKind::Lt, AssocOp::Less | AssocOp::LessEqual) | + (BinOpKind::Le, AssocOp::LessEqual | AssocOp::Less) | + // `x > y > z` and friends. + (BinOpKind::Gt, AssocOp::Greater | AssocOp::GreaterEqual) | + (BinOpKind::Ge, AssocOp::GreaterEqual | AssocOp::Greater) => { + let expr_to_str = |e: &Expr| { + self.span_to_snippet(e.span) + .unwrap_or_else(|_| pprust::expr_to_string(&e)) + }; + err.span_suggestion_verbose( + inner_op.span.shrink_to_hi(), + "split the comparison into two", + format!(" && {}", expr_to_str(&r1)), + Applicability::MaybeIncorrect, + ); + false // Keep the current parse behavior, where the AST is `(x < y) < z`. + } + // `x == y < z` + (BinOpKind::Eq, AssocOp::Less | AssocOp::LessEqual | AssocOp::Greater | AssocOp::GreaterEqual) => { + // Consume `z`/outer-op-rhs. + let snapshot = self.clone(); + match self.parse_expr() { + Ok(r2) => { + // We are sure that outer-op-rhs could be consumed, the suggestion is + // likely correct. + enclose(r1.span, r2.span); + true + } + Err(mut expr_err) => { + expr_err.cancel(); + *self = snapshot; + false + } + } + } + // `x > y == z` + (BinOpKind::Lt | BinOpKind::Le | BinOpKind::Gt | BinOpKind::Ge, AssocOp::Equal) => { + let snapshot = self.clone(); + // At this point it is always valid to enclose the lhs in parentheses, no + // further checks are necessary. + match self.parse_expr() { + Ok(_) => { + enclose(l1.span, r1.span); + true + } + Err(mut expr_err) => { + expr_err.cancel(); + *self = snapshot; + false + } + } + } + _ => false, + }; + } + false + } + + /// Produces an error if comparison operators are chained (RFC #558). + /// We only need to check the LHS, not the RHS, because all comparison ops have same + /// precedence (see `fn precedence`) and are left-associative (see `fn fixity`). + /// + /// This can also be hit if someone incorrectly writes `foo<bar>()` when they should have used + /// the turbofish (`foo::<bar>()`) syntax. We attempt some heuristic recovery if that is the + /// case. + /// + /// Keep in mind that given that `outer_op.is_comparison()` holds and comparison ops are left + /// associative we can infer that we have: + /// + /// ```text + /// outer_op + /// / \ + /// inner_op r2 + /// / \ + /// l1 r1 + /// ``` + pub(super) fn check_no_chained_comparison( + &mut self, + inner_op: &Expr, + outer_op: &Spanned<AssocOp>, + ) -> PResult<'a, Option<P<Expr>>> { + debug_assert!( + outer_op.node.is_comparison(), + "check_no_chained_comparison: {:?} is not comparison", + outer_op.node, + ); + + let mk_err_expr = + |this: &Self, span| Ok(Some(this.mk_expr(span, ExprKind::Err, AttrVec::new()))); + + match inner_op.kind { + ExprKind::Binary(op, ref l1, ref r1) if op.node.is_comparison() => { + let mut err = self.struct_span_err( + vec![op.span, self.prev_token.span], + "comparison operators cannot be chained", + ); + + let suggest = |err: &mut DiagnosticBuilder<'_>| { + err.span_suggestion_verbose( + op.span.shrink_to_lo(), + TURBOFISH, + "::".to_string(), + Applicability::MaybeIncorrect, + ); + }; + + // Include `<` to provide this recommendation even in a case like + // `Foo<Bar<Baz<Qux, ()>>>` + if op.node == BinOpKind::Lt && outer_op.node == AssocOp::Less + || outer_op.node == AssocOp::Greater + { + if outer_op.node == AssocOp::Less { + let snapshot = self.clone(); + self.bump(); + // So far we have parsed `foo<bar<`, consume the rest of the type args. + let modifiers = + [(token::Lt, 1), (token::Gt, -1), (token::BinOp(token::Shr), -2)]; + self.consume_tts(1, &modifiers[..]); + + if !&[token::OpenDelim(token::Paren), token::ModSep] + .contains(&self.token.kind) + { + // We don't have `foo< bar >(` or `foo< bar >::`, so we rewind the + // parser and bail out. + *self = snapshot.clone(); + } + } + return if token::ModSep == self.token.kind { + // We have some certainty that this was a bad turbofish at this point. + // `foo< bar >::` + suggest(&mut err); + + let snapshot = self.clone(); + self.bump(); // `::` + + // Consume the rest of the likely `foo<bar>::new()` or return at `foo<bar>`. + match self.parse_expr() { + Ok(_) => { + // 99% certain that the suggestion is correct, continue parsing. + err.emit(); + // FIXME: actually check that the two expressions in the binop are + // paths and resynthesize new fn call expression instead of using + // `ExprKind::Err` placeholder. + mk_err_expr(self, inner_op.span.to(self.prev_token.span)) + } + Err(mut expr_err) => { + expr_err.cancel(); + // Not entirely sure now, but we bubble the error up with the + // suggestion. + *self = snapshot; + Err(err) + } + } + } else if token::OpenDelim(token::Paren) == self.token.kind { + // We have high certainty that this was a bad turbofish at this point. + // `foo< bar >(` + suggest(&mut err); + // Consume the fn call arguments. + match self.consume_fn_args() { + Err(()) => Err(err), + Ok(()) => { + err.emit(); + // FIXME: actually check that the two expressions in the binop are + // paths and resynthesize new fn call expression instead of using + // `ExprKind::Err` placeholder. + mk_err_expr(self, inner_op.span.to(self.prev_token.span)) + } + } + } else { + if !matches!(l1.kind, ExprKind::Lit(_)) + && !matches!(r1.kind, ExprKind::Lit(_)) + { + // All we know is that this is `foo < bar >` and *nothing* else. Try to + // be helpful, but don't attempt to recover. + err.help(TURBOFISH); + err.help("or use `(...)` if you meant to specify fn arguments"); + } + + // If it looks like a genuine attempt to chain operators (as opposed to a + // misformatted turbofish, for instance), suggest a correct form. + if self.attempt_chained_comparison_suggestion(&mut err, inner_op, outer_op) + { + err.emit(); + mk_err_expr(self, inner_op.span.to(self.prev_token.span)) + } else { + // These cases cause too many knock-down errors, bail out (#61329). + Err(err) + } + }; + } + let recover = + self.attempt_chained_comparison_suggestion(&mut err, inner_op, outer_op); + err.emit(); + if recover { + return mk_err_expr(self, inner_op.span.to(self.prev_token.span)); + } + } + _ => {} + } + Ok(None) + } + + fn consume_fn_args(&mut self) -> Result<(), ()> { + let snapshot = self.clone(); + self.bump(); // `(` + + // Consume the fn call arguments. + let modifiers = + [(token::OpenDelim(token::Paren), 1), (token::CloseDelim(token::Paren), -1)]; + self.consume_tts(1, &modifiers[..]); + + if self.token.kind == token::Eof { + // Not entirely sure that what we consumed were fn arguments, rollback. + *self = snapshot; + Err(()) + } else { + // 99% certain that the suggestion is correct, continue parsing. + Ok(()) + } + } + + pub(super) fn maybe_report_ambiguous_plus( + &mut self, + allow_plus: AllowPlus, + impl_dyn_multi: bool, + ty: &Ty, + ) { + if matches!(allow_plus, AllowPlus::No) && impl_dyn_multi { + let sum_with_parens = format!("({})", pprust::ty_to_string(&ty)); + self.struct_span_err(ty.span, "ambiguous `+` in a type") + .span_suggestion( + ty.span, + "use parentheses to disambiguate", + sum_with_parens, + Applicability::MachineApplicable, + ) + .emit(); + } + } + + pub(super) fn maybe_recover_from_bad_type_plus( + &mut self, + allow_plus: AllowPlus, + ty: &Ty, + ) -> PResult<'a, ()> { + // Do not add `+` to expected tokens. + if matches!(allow_plus, AllowPlus::No) || !self.token.is_like_plus() { + return Ok(()); + } + + self.bump(); // `+` + let bounds = self.parse_generic_bounds(None)?; + let sum_span = ty.span.to(self.prev_token.span); + + let mut err = struct_span_err!( + self.sess.span_diagnostic, + sum_span, + E0178, + "expected a path on the left-hand side of `+`, not `{}`", + pprust::ty_to_string(ty) + ); + + match ty.kind { + TyKind::Rptr(ref lifetime, ref mut_ty) => { + let sum_with_parens = pprust::to_string(|s| { + s.s.word("&"); + s.print_opt_lifetime(lifetime); + s.print_mutability(mut_ty.mutbl, false); + s.popen(); + s.print_type(&mut_ty.ty); + s.print_type_bounds(" +", &bounds); + s.pclose() + }); + err.span_suggestion( + sum_span, + "try adding parentheses", + sum_with_parens, + Applicability::MachineApplicable, + ); + } + TyKind::Ptr(..) | TyKind::BareFn(..) => { + err.span_label(sum_span, "perhaps you forgot parentheses?"); + } + _ => { + err.span_label(sum_span, "expected a path"); + } + } + err.emit(); + Ok(()) + } + + /// Tries to recover from associated item paths like `[T]::AssocItem` / `(T, U)::AssocItem`. + /// Attempts to convert the base expression/pattern/type into a type, parses the `::AssocItem` + /// tail, and combines them into a `<Ty>::AssocItem` expression/pattern/type. + pub(super) fn maybe_recover_from_bad_qpath<T: RecoverQPath>( + &mut self, + base: P<T>, + allow_recovery: bool, + ) -> PResult<'a, P<T>> { + // Do not add `::` to expected tokens. + if allow_recovery && self.token == token::ModSep { + if let Some(ty) = base.to_ty() { + return self.maybe_recover_from_bad_qpath_stage_2(ty.span, ty); + } + } + Ok(base) + } + + /// Given an already parsed `Ty`, parses the `::AssocItem` tail and + /// combines them into a `<Ty>::AssocItem` expression/pattern/type. + pub(super) fn maybe_recover_from_bad_qpath_stage_2<T: RecoverQPath>( + &mut self, + ty_span: Span, + ty: P<Ty>, + ) -> PResult<'a, P<T>> { + self.expect(&token::ModSep)?; + + let mut path = ast::Path { segments: Vec::new(), span: DUMMY_SP }; + self.parse_path_segments(&mut path.segments, T::PATH_STYLE)?; + path.span = ty_span.to(self.prev_token.span); + + let ty_str = self.span_to_snippet(ty_span).unwrap_or_else(|_| pprust::ty_to_string(&ty)); + self.struct_span_err(path.span, "missing angle brackets in associated item path") + .span_suggestion( + // This is a best-effort recovery. + path.span, + "try", + format!("<{}>::{}", ty_str, pprust::path_to_string(&path)), + Applicability::MaybeIncorrect, + ) + .emit(); + + let path_span = ty_span.shrink_to_hi(); // Use an empty path since `position == 0`. + Ok(P(T::recovered(Some(QSelf { ty, path_span, position: 0 }), path))) + } + + pub(super) fn maybe_consume_incorrect_semicolon(&mut self, items: &[P<Item>]) -> bool { + if self.eat(&token::Semi) { + let mut err = self.struct_span_err(self.prev_token.span, "expected item, found `;`"); + err.span_suggestion_short( + self.prev_token.span, + "remove this semicolon", + String::new(), + Applicability::MachineApplicable, + ); + if !items.is_empty() { + let previous_item = &items[items.len() - 1]; + let previous_item_kind_name = match previous_item.kind { + // Say "braced struct" because tuple-structs and + // braceless-empty-struct declarations do take a semicolon. + ItemKind::Struct(..) => Some("braced struct"), + ItemKind::Enum(..) => Some("enum"), + ItemKind::Trait(..) => Some("trait"), + ItemKind::Union(..) => Some("union"), + _ => None, + }; + if let Some(name) = previous_item_kind_name { + err.help(&format!("{} declarations are not followed by a semicolon", name)); + } + } + err.emit(); + true + } else { + false + } + } + + /// Creates a `DiagnosticBuilder` for an unexpected token `t` and tries to recover if it is a + /// closing delimiter. + pub(super) fn unexpected_try_recover( + &mut self, + t: &TokenKind, + ) -> PResult<'a, bool /* recovered */> { + let token_str = pprust::token_kind_to_string(t); + let this_token_str = super::token_descr(&self.token); + let (prev_sp, sp) = match (&self.token.kind, self.subparser_name) { + // Point at the end of the macro call when reaching end of macro arguments. + (token::Eof, Some(_)) => { + let sp = self.sess.source_map().next_point(self.token.span); + (sp, sp) + } + // We don't want to point at the following span after DUMMY_SP. + // This happens when the parser finds an empty TokenStream. + _ if self.prev_token.span == DUMMY_SP => (self.token.span, self.token.span), + // EOF, don't want to point at the following char, but rather the last token. + (token::Eof, None) => (self.prev_token.span, self.token.span), + _ => (self.prev_token.span.shrink_to_hi(), self.token.span), + }; + let msg = format!( + "expected `{}`, found {}", + token_str, + match (&self.token.kind, self.subparser_name) { + (token::Eof, Some(origin)) => format!("end of {}", origin), + _ => this_token_str, + }, + ); + let mut err = self.struct_span_err(sp, &msg); + let label_exp = format!("expected `{}`", token_str); + match self.recover_closing_delimiter(&[t.clone()], err) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + let sm = self.sess.source_map(); + if !sm.is_multiline(prev_sp.until(sp)) { + // When the spans are in the same line, it means that the only content + // between them is whitespace, point only at the found token. + err.span_label(sp, label_exp); + } else { + err.span_label(prev_sp, label_exp); + err.span_label(sp, "unexpected token"); + } + Err(err) + } + + pub(super) fn expect_semi(&mut self) -> PResult<'a, ()> { + if self.eat(&token::Semi) { + return Ok(()); + } + let sm = self.sess.source_map(); + let msg = format!("expected `;`, found {}", super::token_descr(&self.token)); + let appl = Applicability::MachineApplicable; + if self.token.span == DUMMY_SP || self.prev_token.span == DUMMY_SP { + // Likely inside a macro, can't provide meaningful suggestions. + return self.expect(&token::Semi).map(drop); + } else if !sm.is_multiline(self.prev_token.span.until(self.token.span)) { + // The current token is in the same line as the prior token, not recoverable. + } else if [token::Comma, token::Colon].contains(&self.token.kind) + && self.prev_token.kind == token::CloseDelim(token::Paren) + { + // Likely typo: The current token is on a new line and is expected to be + // `.`, `;`, `?`, or an operator after a close delimiter token. + // + // let a = std::process::Command::new("echo") + // .arg("1") + // ,arg("2") + // ^ + // https://github.com/rust-lang/rust/issues/72253 + self.expect(&token::Semi)?; + return Ok(()); + } else if self.look_ahead(1, |t| { + t == &token::CloseDelim(token::Brace) || t.can_begin_expr() && t.kind != token::Colon + }) && [token::Comma, token::Colon].contains(&self.token.kind) + { + // Likely typo: `,` → `;` or `:` → `;`. This is triggered if the current token is + // either `,` or `:`, and the next token could either start a new statement or is a + // block close. For example: + // + // let x = 32: + // let y = 42; + self.bump(); + let sp = self.prev_token.span; + self.struct_span_err(sp, &msg) + .span_suggestion_short(sp, "change this to `;`", ";".to_string(), appl) + .emit(); + return Ok(()); + } else if self.look_ahead(0, |t| { + t == &token::CloseDelim(token::Brace) + || ( + t.can_begin_expr() && t != &token::Semi && t != &token::Pound + // Avoid triggering with too many trailing `#` in raw string. + ) + }) { + // Missing semicolon typo. This is triggered if the next token could either start a + // new statement or is a block close. For example: + // + // let x = 32 + // let y = 42; + let sp = self.prev_token.span.shrink_to_hi(); + self.struct_span_err(sp, &msg) + .span_label(self.token.span, "unexpected token") + .span_suggestion_short(sp, "add `;` here", ";".to_string(), appl) + .emit(); + return Ok(()); + } + self.expect(&token::Semi).map(drop) // Error unconditionally + } + + /// Consumes alternative await syntaxes like `await!(<expr>)`, `await <expr>`, + /// `await? <expr>`, `await(<expr>)`, and `await { <expr> }`. + pub(super) fn recover_incorrect_await_syntax( + &mut self, + lo: Span, + await_sp: Span, + attrs: AttrVec, + ) -> PResult<'a, P<Expr>> { + let (hi, expr, is_question) = if self.token == token::Not { + // Handle `await!(<expr>)`. + self.recover_await_macro()? + } else { + self.recover_await_prefix(await_sp)? + }; + let sp = self.error_on_incorrect_await(lo, hi, &expr, is_question); + let expr = self.mk_expr(lo.to(sp), ExprKind::Await(expr), attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + fn recover_await_macro(&mut self) -> PResult<'a, (Span, P<Expr>, bool)> { + self.expect(&token::Not)?; + self.expect(&token::OpenDelim(token::Paren))?; + let expr = self.parse_expr()?; + self.expect(&token::CloseDelim(token::Paren))?; + Ok((self.prev_token.span, expr, false)) + } + + fn recover_await_prefix(&mut self, await_sp: Span) -> PResult<'a, (Span, P<Expr>, bool)> { + let is_question = self.eat(&token::Question); // Handle `await? <expr>`. + let expr = if self.token == token::OpenDelim(token::Brace) { + // Handle `await { <expr> }`. + // This needs to be handled separatedly from the next arm to avoid + // interpreting `await { <expr> }?` as `<expr>?.await`. + self.parse_block_expr(None, self.token.span, BlockCheckMode::Default, AttrVec::new()) + } else { + self.parse_expr() + } + .map_err(|mut err| { + err.span_label(await_sp, "while parsing this incorrect await expression"); + err + })?; + Ok((expr.span, expr, is_question)) + } + + fn error_on_incorrect_await(&self, lo: Span, hi: Span, expr: &Expr, is_question: bool) -> Span { + let expr_str = + self.span_to_snippet(expr.span).unwrap_or_else(|_| pprust::expr_to_string(&expr)); + let suggestion = format!("{}.await{}", expr_str, if is_question { "?" } else { "" }); + let sp = lo.to(hi); + let app = match expr.kind { + ExprKind::Try(_) => Applicability::MaybeIncorrect, // `await <expr>?` + _ => Applicability::MachineApplicable, + }; + self.struct_span_err(sp, "incorrect use of `await`") + .span_suggestion(sp, "`await` is a postfix operation", suggestion, app) + .emit(); + sp + } + + /// If encountering `future.await()`, consumes and emits an error. + pub(super) fn recover_from_await_method_call(&mut self) { + if self.token == token::OpenDelim(token::Paren) + && self.look_ahead(1, |t| t == &token::CloseDelim(token::Paren)) + { + // future.await() + let lo = self.token.span; + self.bump(); // ( + let sp = lo.to(self.token.span); + self.bump(); // ) + self.struct_span_err(sp, "incorrect use of `await`") + .span_suggestion( + sp, + "`await` is not a method call, remove the parentheses", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + pub(super) fn try_macro_suggestion(&mut self) -> PResult<'a, P<Expr>> { + let is_try = self.token.is_keyword(kw::Try); + let is_questionmark = self.look_ahead(1, |t| t == &token::Not); //check for ! + let is_open = self.look_ahead(2, |t| t == &token::OpenDelim(token::Paren)); //check for ( + + if is_try && is_questionmark && is_open { + let lo = self.token.span; + self.bump(); //remove try + self.bump(); //remove ! + let try_span = lo.to(self.token.span); //we take the try!( span + self.bump(); //remove ( + let is_empty = self.token == token::CloseDelim(token::Paren); //check if the block is empty + self.consume_block(token::Paren, ConsumeClosingDelim::No); //eat the block + let hi = self.token.span; + self.bump(); //remove ) + let mut err = self.struct_span_err(lo.to(hi), "use of deprecated `try` macro"); + err.note("in the 2018 edition `try` is a reserved keyword, and the `try!()` macro is deprecated"); + let prefix = if is_empty { "" } else { "alternatively, " }; + if !is_empty { + err.multipart_suggestion( + "you can use the `?` operator instead", + vec![(try_span, "".to_owned()), (hi, "?".to_owned())], + Applicability::MachineApplicable, + ); + } + err.span_suggestion(lo.shrink_to_lo(), &format!("{}you can still access the deprecated `try!()` macro using the \"raw identifier\" syntax", prefix), "r#".to_string(), Applicability::MachineApplicable); + err.emit(); + Ok(self.mk_expr_err(lo.to(hi))) + } else { + Err(self.expected_expression_found()) // The user isn't trying to invoke the try! macro + } + } + + /// Recovers a situation like `for ( $pat in $expr )` + /// and suggest writing `for $pat in $expr` instead. + /// + /// This should be called before parsing the `$block`. + pub(super) fn recover_parens_around_for_head( + &mut self, + pat: P<Pat>, + expr: &Expr, + begin_paren: Option<Span>, + ) -> P<Pat> { + match (&self.token.kind, begin_paren) { + (token::CloseDelim(token::Paren), Some(begin_par_sp)) => { + self.bump(); + + let pat_str = self + // Remove the `(` from the span of the pattern: + .span_to_snippet(pat.span.trim_start(begin_par_sp).unwrap()) + .unwrap_or_else(|_| pprust::pat_to_string(&pat)); + + self.struct_span_err(self.prev_token.span, "unexpected closing `)`") + .span_label(begin_par_sp, "opening `(`") + .span_suggestion( + begin_par_sp.to(self.prev_token.span), + "remove parenthesis in `for` loop", + format!("{} in {}", pat_str, pprust::expr_to_string(&expr)), + // With e.g. `for (x) in y)` this would replace `(x) in y)` + // with `x) in y)` which is syntactically invalid. + // However, this is prevented before we get here. + Applicability::MachineApplicable, + ) + .emit(); + + // Unwrap `(pat)` into `pat` to avoid the `unused_parens` lint. + pat.and_then(|pat| match pat.kind { + PatKind::Paren(pat) => pat, + _ => P(pat), + }) + } + _ => pat, + } + } + + pub(super) fn could_ascription_be_path(&self, node: &ast::ExprKind) -> bool { + (self.token == token::Lt && // `foo:<bar`, likely a typoed turbofish. + self.look_ahead(1, |t| t.is_ident() && !t.is_reserved_ident())) + || self.token.is_ident() && + match node { + // `foo::` → `foo:` or `foo.bar::` → `foo.bar:` + ast::ExprKind::Path(..) | ast::ExprKind::Field(..) => true, + _ => false, + } && + !self.token.is_reserved_ident() && // v `foo:bar(baz)` + self.look_ahead(1, |t| t == &token::OpenDelim(token::Paren)) + || self.look_ahead(1, |t| t == &token::OpenDelim(token::Brace)) // `foo:bar {` + || self.look_ahead(1, |t| t == &token::Colon) && // `foo:bar::<baz` + self.look_ahead(2, |t| t == &token::Lt) && + self.look_ahead(3, |t| t.is_ident()) + || self.look_ahead(1, |t| t == &token::Colon) && // `foo:bar:baz` + self.look_ahead(2, |t| t.is_ident()) + || self.look_ahead(1, |t| t == &token::ModSep) + && (self.look_ahead(2, |t| t.is_ident()) || // `foo:bar::baz` + self.look_ahead(2, |t| t == &token::Lt)) // `foo:bar::<baz>` + } + + pub(super) fn recover_seq_parse_error( + &mut self, + delim: token::DelimToken, + lo: Span, + result: PResult<'a, P<Expr>>, + ) -> P<Expr> { + match result { + Ok(x) => x, + Err(mut err) => { + err.emit(); + // Recover from parse error, callers expect the closing delim to be consumed. + self.consume_block(delim, ConsumeClosingDelim::Yes); + self.mk_expr(lo.to(self.prev_token.span), ExprKind::Err, AttrVec::new()) + } + } + } + + pub(super) fn recover_closing_delimiter( + &mut self, + tokens: &[TokenKind], + mut err: DiagnosticBuilder<'a>, + ) -> PResult<'a, bool> { + let mut pos = None; + // We want to use the last closing delim that would apply. + for (i, unmatched) in self.unclosed_delims.iter().enumerate().rev() { + if tokens.contains(&token::CloseDelim(unmatched.expected_delim)) + && Some(self.token.span) > unmatched.unclosed_span + { + pos = Some(i); + } + } + match pos { + Some(pos) => { + // Recover and assume that the detected unclosed delimiter was meant for + // this location. Emit the diagnostic and act as if the delimiter was + // present for the parser's sake. + + // Don't attempt to recover from this unclosed delimiter more than once. + let unmatched = self.unclosed_delims.remove(pos); + let delim = TokenType::Token(token::CloseDelim(unmatched.expected_delim)); + if unmatched.found_delim.is_none() { + // We encountered `Eof`, set this fact here to avoid complaining about missing + // `fn main()` when we found place to suggest the closing brace. + *self.sess.reached_eof.borrow_mut() = true; + } + + // We want to suggest the inclusion of the closing delimiter where it makes + // the most sense, which is immediately after the last token: + // + // {foo(bar {}} + // - ^ + // | | + // | help: `)` may belong here + // | + // unclosed delimiter + if let Some(sp) = unmatched.unclosed_span { + err.span_label(sp, "unclosed delimiter"); + } + // Backticks should be removed to apply suggestions. + let mut delim = delim.to_string(); + delim.retain(|c| c != '`'); + err.span_suggestion_short( + self.prev_token.span.shrink_to_hi(), + &format!("`{}` may belong here", delim), + delim, + Applicability::MaybeIncorrect, + ); + if unmatched.found_delim.is_none() { + // Encountered `Eof` when lexing blocks. Do not recover here to avoid knockdown + // errors which would be emitted elsewhere in the parser and let other error + // recovery consume the rest of the file. + Err(err) + } else { + err.emit(); + self.expected_tokens.clear(); // Reduce the number of errors. + Ok(true) + } + } + _ => Err(err), + } + } + + /// Eats tokens until we can be relatively sure we reached the end of the + /// statement. This is something of a best-effort heuristic. + /// + /// We terminate when we find an unmatched `}` (without consuming it). + pub(super) fn recover_stmt(&mut self) { + self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore) + } + + /// If `break_on_semi` is `Break`, then we will stop consuming tokens after + /// finding (and consuming) a `;` outside of `{}` or `[]` (note that this is + /// approximate -- it can mean we break too early due to macros, but that + /// should only lead to sub-optimal recovery, not inaccurate parsing). + /// + /// If `break_on_block` is `Break`, then we will stop consuming tokens + /// after finding (and consuming) a brace-delimited block. + pub(super) fn recover_stmt_( + &mut self, + break_on_semi: SemiColonMode, + break_on_block: BlockMode, + ) { + let mut brace_depth = 0; + let mut bracket_depth = 0; + let mut in_block = false; + debug!("recover_stmt_ enter loop (semi={:?}, block={:?})", break_on_semi, break_on_block); + loop { + debug!("recover_stmt_ loop {:?}", self.token); + match self.token.kind { + token::OpenDelim(token::DelimToken::Brace) => { + brace_depth += 1; + self.bump(); + if break_on_block == BlockMode::Break && brace_depth == 1 && bracket_depth == 0 + { + in_block = true; + } + } + token::OpenDelim(token::DelimToken::Bracket) => { + bracket_depth += 1; + self.bump(); + } + token::CloseDelim(token::DelimToken::Brace) => { + if brace_depth == 0 { + debug!("recover_stmt_ return - close delim {:?}", self.token); + break; + } + brace_depth -= 1; + self.bump(); + if in_block && bracket_depth == 0 && brace_depth == 0 { + debug!("recover_stmt_ return - block end {:?}", self.token); + break; + } + } + token::CloseDelim(token::DelimToken::Bracket) => { + bracket_depth -= 1; + if bracket_depth < 0 { + bracket_depth = 0; + } + self.bump(); + } + token::Eof => { + debug!("recover_stmt_ return - Eof"); + break; + } + token::Semi => { + self.bump(); + if break_on_semi == SemiColonMode::Break + && brace_depth == 0 + && bracket_depth == 0 + { + debug!("recover_stmt_ return - Semi"); + break; + } + } + token::Comma + if break_on_semi == SemiColonMode::Comma + && brace_depth == 0 + && bracket_depth == 0 => + { + debug!("recover_stmt_ return - Semi"); + break; + } + _ => self.bump(), + } + } + } + + pub(super) fn check_for_for_in_in_typo(&mut self, in_span: Span) { + if self.eat_keyword(kw::In) { + // a common typo: `for _ in in bar {}` + self.struct_span_err(self.prev_token.span, "expected iterable, found keyword `in`") + .span_suggestion_short( + in_span.until(self.prev_token.span), + "remove the duplicated `in`", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + pub(super) fn expected_semi_or_open_brace<T>(&mut self) -> PResult<'a, T> { + let token_str = super::token_descr(&self.token); + let msg = &format!("expected `;` or `{{`, found {}", token_str); + let mut err = self.struct_span_err(self.token.span, msg); + err.span_label(self.token.span, "expected `;` or `{`"); + Err(err) + } + + pub(super) fn eat_incorrect_doc_comment_for_param_type(&mut self) { + if let token::DocComment(..) = self.token.kind { + self.struct_span_err( + self.token.span, + "documentation comments cannot be applied to a function parameter's type", + ) + .span_label(self.token.span, "doc comments are not allowed here") + .emit(); + self.bump(); + } else if self.token == token::Pound + && self.look_ahead(1, |t| *t == token::OpenDelim(token::Bracket)) + { + let lo = self.token.span; + // Skip every token until next possible arg. + while self.token != token::CloseDelim(token::Bracket) { + self.bump(); + } + let sp = lo.to(self.token.span); + self.bump(); + self.struct_span_err(sp, "attributes cannot be applied to a function parameter's type") + .span_label(sp, "attributes are not allowed here") + .emit(); + } + } + + pub(super) fn parameter_without_type( + &mut self, + err: &mut DiagnosticBuilder<'_>, + pat: P<ast::Pat>, + require_name: bool, + first_param: bool, + ) -> Option<Ident> { + // If we find a pattern followed by an identifier, it could be an (incorrect) + // C-style parameter declaration. + if self.check_ident() + && self.look_ahead(1, |t| *t == token::Comma || *t == token::CloseDelim(token::Paren)) + { + // `fn foo(String s) {}` + let ident = self.parse_ident().unwrap(); + let span = pat.span.with_hi(ident.span.hi()); + + err.span_suggestion( + span, + "declare the type after the parameter binding", + String::from("<identifier>: <type>"), + Applicability::HasPlaceholders, + ); + return Some(ident); + } else if let PatKind::Ident(_, ident, _) = pat.kind { + if require_name + && (self.token == token::Comma + || self.token == token::Lt + || self.token == token::CloseDelim(token::Paren)) + { + // `fn foo(a, b) {}`, `fn foo(a<x>, b<y>) {}` or `fn foo(usize, usize) {}` + if first_param { + err.span_suggestion( + pat.span, + "if this is a `self` type, give it a parameter name", + format!("self: {}", ident), + Applicability::MaybeIncorrect, + ); + } + // Avoid suggesting that `fn foo(HashMap<u32>)` is fixed with a change to + // `fn foo(HashMap: TypeName<u32>)`. + if self.token != token::Lt { + err.span_suggestion( + pat.span, + "if this is a parameter name, give it a type", + format!("{}: TypeName", ident), + Applicability::HasPlaceholders, + ); + } + err.span_suggestion( + pat.span, + "if this is a type, explicitly ignore the parameter name", + format!("_: {}", ident), + Applicability::MachineApplicable, + ); + err.note("anonymous parameters are removed in the 2018 edition (see RFC 1685)"); + + // Don't attempt to recover by using the `X` in `X<Y>` as the parameter name. + return if self.token == token::Lt { None } else { Some(ident) }; + } + } + None + } + + pub(super) fn recover_arg_parse(&mut self) -> PResult<'a, (P<ast::Pat>, P<ast::Ty>)> { + let pat = self.parse_pat(Some("argument name"))?; + self.expect(&token::Colon)?; + let ty = self.parse_ty()?; + + struct_span_err!( + self.diagnostic(), + pat.span, + E0642, + "patterns aren't allowed in methods without bodies", + ) + .span_suggestion_short( + pat.span, + "give this argument a name or use an underscore to ignore it", + "_".to_owned(), + Applicability::MachineApplicable, + ) + .emit(); + + // Pretend the pattern is `_`, to avoid duplicate errors from AST validation. + let pat = + P(Pat { kind: PatKind::Wild, span: pat.span, id: ast::DUMMY_NODE_ID, tokens: None }); + Ok((pat, ty)) + } + + pub(super) fn recover_bad_self_param(&mut self, mut param: Param) -> PResult<'a, Param> { + let sp = param.pat.span; + param.ty.kind = TyKind::Err; + self.struct_span_err(sp, "unexpected `self` parameter in function") + .span_label(sp, "must be the first parameter of an associated function") + .emit(); + Ok(param) + } + + pub(super) fn consume_block( + &mut self, + delim: token::DelimToken, + consume_close: ConsumeClosingDelim, + ) { + let mut brace_depth = 0; + loop { + if self.eat(&token::OpenDelim(delim)) { + brace_depth += 1; + } else if self.check(&token::CloseDelim(delim)) { + if brace_depth == 0 { + if let ConsumeClosingDelim::Yes = consume_close { + // Some of the callers of this method expect to be able to parse the + // closing delimiter themselves, so we leave it alone. Otherwise we advance + // the parser. + self.bump(); + } + return; + } else { + self.bump(); + brace_depth -= 1; + continue; + } + } else if self.token == token::Eof || self.eat(&token::CloseDelim(token::NoDelim)) { + return; + } else { + self.bump(); + } + } + } + + pub(super) fn expected_expression_found(&self) -> DiagnosticBuilder<'a> { + let (span, msg) = match (&self.token.kind, self.subparser_name) { + (&token::Eof, Some(origin)) => { + let sp = self.sess.source_map().next_point(self.token.span); + (sp, format!("expected expression, found end of {}", origin)) + } + _ => ( + self.token.span, + format!("expected expression, found {}", super::token_descr(&self.token),), + ), + }; + let mut err = self.struct_span_err(span, &msg); + let sp = self.sess.source_map().start_point(self.token.span); + if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&sp) { + self.sess.expr_parentheses_needed(&mut err, *sp, None); + } + err.span_label(span, "expected expression"); + err + } + + fn consume_tts( + &mut self, + mut acc: i64, // `i64` because malformed code can have more closing delims than opening. + // Not using `FxHashMap` due to `token::TokenKind: !Eq + !Hash`. + modifier: &[(token::TokenKind, i64)], + ) { + while acc > 0 { + if let Some((_, val)) = modifier.iter().find(|(t, _)| *t == self.token.kind) { + acc += *val; + } + if self.token.kind == token::Eof { + break; + } + self.bump(); + } + } + + /// Replace duplicated recovered parameters with `_` pattern to avoid unnecessary errors. + /// + /// This is necessary because at this point we don't know whether we parsed a function with + /// anonymous parameters or a function with names but no types. In order to minimize + /// unnecessary errors, we assume the parameters are in the shape of `fn foo(a, b, c)` where + /// the parameters are *names* (so we don't emit errors about not being able to find `b` in + /// the local scope), but if we find the same name multiple times, like in `fn foo(i8, i8)`, + /// we deduplicate them to not complain about duplicated parameter names. + pub(super) fn deduplicate_recovered_params_names(&self, fn_inputs: &mut Vec<Param>) { + let mut seen_inputs = FxHashSet::default(); + for input in fn_inputs.iter_mut() { + let opt_ident = if let (PatKind::Ident(_, ident, _), TyKind::Err) = + (&input.pat.kind, &input.ty.kind) + { + Some(*ident) + } else { + None + }; + if let Some(ident) = opt_ident { + if seen_inputs.contains(&ident) { + input.pat.kind = PatKind::Wild; + } + seen_inputs.insert(ident); + } + } + } +} diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs new file mode 100644 index 00000000000..f022c628fe2 --- /dev/null +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -0,0 +1,2293 @@ +use super::pat::{GateOr, PARAM_EXPECTED}; +use super::ty::{AllowPlus, RecoverQPath}; +use super::{BlockMode, Parser, PathStyle, Restrictions, TokenType}; +use super::{SemiColonMode, SeqSep, TokenExpectType}; +use crate::maybe_recover_from_interpolated_ty_qpath; + +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Token, TokenKind}; +use rustc_ast::util::classify; +use rustc_ast::util::literal::LitError; +use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; +use rustc_ast::{self as ast, AttrStyle, AttrVec, CaptureBy, Field, Lit, UnOp, DUMMY_NODE_ID}; +use rustc_ast::{AnonConst, BinOp, BinOpKind, FnDecl, FnRetTy, MacCall, Param, Ty, TyKind}; +use rustc_ast::{Arm, Async, BlockCheckMode, Expr, ExprKind, Label, Movability, RangeLimits}; +use rustc_ast_pretty::pprust; +use rustc_errors::{Applicability, DiagnosticBuilder, PResult}; +use rustc_span::source_map::{self, Span, Spanned}; +use rustc_span::symbol::{kw, sym, Ident, Symbol}; +use std::mem; +use tracing::debug; + +/// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression +/// dropped into the token stream, which happens while parsing the result of +/// macro expansion). Placement of these is not as complex as I feared it would +/// be. The important thing is to make sure that lookahead doesn't balk at +/// `token::Interpolated` tokens. +macro_rules! maybe_whole_expr { + ($p:expr) => { + if let token::Interpolated(nt) = &$p.token.kind { + match &**nt { + token::NtExpr(e) | token::NtLiteral(e) => { + let e = e.clone(); + $p.bump(); + return Ok(e); + } + token::NtPath(path) => { + let path = path.clone(); + $p.bump(); + return Ok($p.mk_expr( + $p.token.span, + ExprKind::Path(None, path), + AttrVec::new(), + )); + } + token::NtBlock(block) => { + let block = block.clone(); + $p.bump(); + return Ok($p.mk_expr( + $p.token.span, + ExprKind::Block(block, None), + AttrVec::new(), + )); + } + _ => {} + }; + } + }; +} + +#[derive(Debug)] +pub(super) enum LhsExpr { + NotYetParsed, + AttributesParsed(AttrVec), + AlreadyParsed(P<Expr>), +} + +impl From<Option<AttrVec>> for LhsExpr { + /// Converts `Some(attrs)` into `LhsExpr::AttributesParsed(attrs)` + /// and `None` into `LhsExpr::NotYetParsed`. + /// + /// This conversion does not allocate. + fn from(o: Option<AttrVec>) -> Self { + if let Some(attrs) = o { LhsExpr::AttributesParsed(attrs) } else { LhsExpr::NotYetParsed } + } +} + +impl From<P<Expr>> for LhsExpr { + /// Converts the `expr: P<Expr>` into `LhsExpr::AlreadyParsed(expr)`. + /// + /// This conversion does not allocate. + fn from(expr: P<Expr>) -> Self { + LhsExpr::AlreadyParsed(expr) + } +} + +impl<'a> Parser<'a> { + /// Parses an expression. + #[inline] + pub fn parse_expr(&mut self) -> PResult<'a, P<Expr>> { + self.parse_expr_res(Restrictions::empty(), None) + } + + pub(super) fn parse_anon_const_expr(&mut self) -> PResult<'a, AnonConst> { + self.parse_expr().map(|value| AnonConst { id: DUMMY_NODE_ID, value }) + } + + fn parse_expr_catch_underscore(&mut self) -> PResult<'a, P<Expr>> { + match self.parse_expr() { + Ok(expr) => Ok(expr), + Err(mut err) => match self.token.ident() { + Some((Ident { name: kw::Underscore, .. }, false)) + if self.look_ahead(1, |t| t == &token::Comma) => + { + // Special-case handling of `foo(_, _, _)` + err.emit(); + self.bump(); + Ok(self.mk_expr(self.prev_token.span, ExprKind::Err, AttrVec::new())) + } + _ => Err(err), + }, + } + } + + /// Parses a sequence of expressions delimited by parentheses. + fn parse_paren_expr_seq(&mut self) -> PResult<'a, Vec<P<Expr>>> { + self.parse_paren_comma_seq(|p| p.parse_expr_catch_underscore()).map(|(r, _)| r) + } + + /// Parses an expression, subject to the given restrictions. + #[inline] + pub(super) fn parse_expr_res( + &mut self, + r: Restrictions, + already_parsed_attrs: Option<AttrVec>, + ) -> PResult<'a, P<Expr>> { + self.with_res(r, |this| this.parse_assoc_expr(already_parsed_attrs)) + } + + /// Parses an associative expression. + /// + /// This parses an expression accounting for associativity and precedence of the operators in + /// the expression. + #[inline] + fn parse_assoc_expr(&mut self, already_parsed_attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> { + self.parse_assoc_expr_with(0, already_parsed_attrs.into()) + } + + /// Parses an associative expression with operators of at least `min_prec` precedence. + pub(super) fn parse_assoc_expr_with( + &mut self, + min_prec: usize, + lhs: LhsExpr, + ) -> PResult<'a, P<Expr>> { + let mut lhs = if let LhsExpr::AlreadyParsed(expr) = lhs { + expr + } else { + let attrs = match lhs { + LhsExpr::AttributesParsed(attrs) => Some(attrs), + _ => None, + }; + if [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind) { + return self.parse_prefix_range_expr(attrs); + } else { + self.parse_prefix_expr(attrs)? + } + }; + let last_type_ascription_set = self.last_type_ascription.is_some(); + + if !self.should_continue_as_assoc_expr(&lhs) { + self.last_type_ascription = None; + return Ok(lhs); + } + + self.expected_tokens.push(TokenType::Operator); + while let Some(op) = self.check_assoc_op() { + // Adjust the span for interpolated LHS to point to the `$lhs` token + // and not to what it refers to. + let lhs_span = match self.prev_token.kind { + TokenKind::Interpolated(..) => self.prev_token.span, + _ => lhs.span, + }; + + let cur_op_span = self.token.span; + let restrictions = if op.node.is_assign_like() { + self.restrictions & Restrictions::NO_STRUCT_LITERAL + } else { + self.restrictions + }; + let prec = op.node.precedence(); + if prec < min_prec { + break; + } + // Check for deprecated `...` syntax + if self.token == token::DotDotDot && op.node == AssocOp::DotDotEq { + self.err_dotdotdot_syntax(self.token.span); + } + + if self.token == token::LArrow { + self.err_larrow_operator(self.token.span); + } + + self.bump(); + if op.node.is_comparison() { + if let Some(expr) = self.check_no_chained_comparison(&lhs, &op)? { + return Ok(expr); + } + } + + if (op.node == AssocOp::Equal || op.node == AssocOp::NotEqual) + && self.token.kind == token::Eq + && self.prev_token.span.hi() == self.token.span.lo() + { + // Look for JS' `===` and `!==` and recover 😇 + let sp = op.span.to(self.token.span); + let sugg = match op.node { + AssocOp::Equal => "==", + AssocOp::NotEqual => "!=", + _ => unreachable!(), + }; + self.struct_span_err(sp, &format!("invalid comparison operator `{}=`", sugg)) + .span_suggestion_short( + sp, + &format!("`{s}=` is not a valid comparison operator, use `{s}`", s = sugg), + sugg.to_string(), + Applicability::MachineApplicable, + ) + .emit(); + self.bump(); + } + + let op = op.node; + // Special cases: + if op == AssocOp::As { + lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Cast)?; + continue; + } else if op == AssocOp::Colon { + lhs = self.parse_assoc_op_ascribe(lhs, lhs_span)?; + continue; + } else if op == AssocOp::DotDot || op == AssocOp::DotDotEq { + // If we didn’t have to handle `x..`/`x..=`, it would be pretty easy to + // generalise it to the Fixity::None code. + lhs = self.parse_range_expr(prec, lhs, op, cur_op_span)?; + break; + } + + let fixity = op.fixity(); + let prec_adjustment = match fixity { + Fixity::Right => 0, + Fixity::Left => 1, + // We currently have no non-associative operators that are not handled above by + // the special cases. The code is here only for future convenience. + Fixity::None => 1, + }; + let rhs = self.with_res(restrictions - Restrictions::STMT_EXPR, |this| { + this.parse_assoc_expr_with(prec + prec_adjustment, LhsExpr::NotYetParsed) + })?; + + // Make sure that the span of the parent node is larger than the span of lhs and rhs, + // including the attributes. + let lhs_span = + lhs.attrs.iter().find(|a| a.style == AttrStyle::Outer).map_or(lhs_span, |a| a.span); + let span = lhs_span.to(rhs.span); + lhs = match op { + AssocOp::Add + | AssocOp::Subtract + | AssocOp::Multiply + | AssocOp::Divide + | AssocOp::Modulus + | AssocOp::LAnd + | AssocOp::LOr + | AssocOp::BitXor + | AssocOp::BitAnd + | AssocOp::BitOr + | AssocOp::ShiftLeft + | AssocOp::ShiftRight + | AssocOp::Equal + | AssocOp::Less + | AssocOp::LessEqual + | AssocOp::NotEqual + | AssocOp::Greater + | AssocOp::GreaterEqual => { + let ast_op = op.to_ast_binop().unwrap(); + let binary = self.mk_binary(source_map::respan(cur_op_span, ast_op), lhs, rhs); + self.mk_expr(span, binary, AttrVec::new()) + } + AssocOp::Assign => { + self.mk_expr(span, ExprKind::Assign(lhs, rhs, cur_op_span), AttrVec::new()) + } + AssocOp::AssignOp(k) => { + let aop = match k { + token::Plus => BinOpKind::Add, + token::Minus => BinOpKind::Sub, + token::Star => BinOpKind::Mul, + token::Slash => BinOpKind::Div, + token::Percent => BinOpKind::Rem, + token::Caret => BinOpKind::BitXor, + token::And => BinOpKind::BitAnd, + token::Or => BinOpKind::BitOr, + token::Shl => BinOpKind::Shl, + token::Shr => BinOpKind::Shr, + }; + let aopexpr = self.mk_assign_op(source_map::respan(cur_op_span, aop), lhs, rhs); + self.mk_expr(span, aopexpr, AttrVec::new()) + } + AssocOp::As | AssocOp::Colon | AssocOp::DotDot | AssocOp::DotDotEq => { + self.span_bug(span, "AssocOp should have been handled by special case") + } + }; + + if let Fixity::None = fixity { + break; + } + } + if last_type_ascription_set { + self.last_type_ascription = None; + } + Ok(lhs) + } + + fn should_continue_as_assoc_expr(&mut self, lhs: &Expr) -> bool { + match (self.expr_is_complete(lhs), AssocOp::from_token(&self.token)) { + // Semi-statement forms are odd: + // See https://github.com/rust-lang/rust/issues/29071 + (true, None) => false, + (false, _) => true, // Continue parsing the expression. + // An exhaustive check is done in the following block, but these are checked first + // because they *are* ambiguous but also reasonable looking incorrect syntax, so we + // want to keep their span info to improve diagnostics in these cases in a later stage. + (true, Some(AssocOp::Multiply)) | // `{ 42 } *foo = bar;` or `{ 42 } * 3` + (true, Some(AssocOp::Subtract)) | // `{ 42 } -5` + (true, Some(AssocOp::Add)) // `{ 42 } + 42 + // If the next token is a keyword, then the tokens above *are* unambiguously incorrect: + // `if x { a } else { b } && if y { c } else { d }` + if !self.look_ahead(1, |t| t.is_used_keyword()) => { + // These cases are ambiguous and can't be identified in the parser alone. + let sp = self.sess.source_map().start_point(self.token.span); + self.sess.ambiguous_block_expr_parse.borrow_mut().insert(sp, lhs.span); + false + } + (true, Some(AssocOp::LAnd)) => { + // `{ 42 } &&x` (#61475) or `{ 42 } && if x { 1 } else { 0 }`. Separated from the + // above due to #74233. + // These cases are ambiguous and can't be identified in the parser alone. + let sp = self.sess.source_map().start_point(self.token.span); + self.sess.ambiguous_block_expr_parse.borrow_mut().insert(sp, lhs.span); + false + } + (true, Some(ref op)) if !op.can_continue_expr_unambiguously() => false, + (true, Some(_)) => { + self.error_found_expr_would_be_stmt(lhs); + true + } + } + } + + /// We've found an expression that would be parsed as a statement, + /// but the next token implies this should be parsed as an expression. + /// For example: `if let Some(x) = x { x } else { 0 } / 2`. + fn error_found_expr_would_be_stmt(&self, lhs: &Expr) { + let mut err = self.struct_span_err( + self.token.span, + &format!("expected expression, found `{}`", pprust::token_to_string(&self.token),), + ); + err.span_label(self.token.span, "expected expression"); + self.sess.expr_parentheses_needed(&mut err, lhs.span, Some(pprust::expr_to_string(&lhs))); + err.emit(); + } + + /// Possibly translate the current token to an associative operator. + /// The method does not advance the current token. + /// + /// Also performs recovery for `and` / `or` which are mistaken for `&&` and `||` respectively. + fn check_assoc_op(&self) -> Option<Spanned<AssocOp>> { + let (op, span) = match (AssocOp::from_token(&self.token), self.token.ident()) { + (Some(op), _) => (op, self.token.span), + (None, Some((Ident { name: sym::and, span }, false))) => { + self.error_bad_logical_op("and", "&&", "conjunction"); + (AssocOp::LAnd, span) + } + (None, Some((Ident { name: sym::or, span }, false))) => { + self.error_bad_logical_op("or", "||", "disjunction"); + (AssocOp::LOr, span) + } + _ => return None, + }; + Some(source_map::respan(span, op)) + } + + /// Error on `and` and `or` suggesting `&&` and `||` respectively. + fn error_bad_logical_op(&self, bad: &str, good: &str, english: &str) { + self.struct_span_err(self.token.span, &format!("`{}` is not a logical operator", bad)) + .span_suggestion_short( + self.token.span, + &format!("use `{}` to perform logical {}", good, english), + good.to_string(), + Applicability::MachineApplicable, + ) + .note("unlike in e.g., python and PHP, `&&` and `||` are used for logical operators") + .emit(); + } + + /// Checks if this expression is a successfully parsed statement. + fn expr_is_complete(&self, e: &Expr) -> bool { + self.restrictions.contains(Restrictions::STMT_EXPR) + && !classify::expr_requires_semi_to_be_stmt(e) + } + + /// Parses `x..y`, `x..=y`, and `x..`/`x..=`. + /// The other two variants are handled in `parse_prefix_range_expr` below. + fn parse_range_expr( + &mut self, + prec: usize, + lhs: P<Expr>, + op: AssocOp, + cur_op_span: Span, + ) -> PResult<'a, P<Expr>> { + let rhs = if self.is_at_start_of_range_notation_rhs() { + Some(self.parse_assoc_expr_with(prec + 1, LhsExpr::NotYetParsed)?) + } else { + None + }; + let rhs_span = rhs.as_ref().map_or(cur_op_span, |x| x.span); + let span = lhs.span.to(rhs_span); + let limits = + if op == AssocOp::DotDot { RangeLimits::HalfOpen } else { RangeLimits::Closed }; + Ok(self.mk_expr(span, self.mk_range(Some(lhs), rhs, limits)?, AttrVec::new())) + } + + fn is_at_start_of_range_notation_rhs(&self) -> bool { + if self.token.can_begin_expr() { + // Parse `for i in 1.. { }` as infinite loop, not as `for i in (1..{})`. + if self.token == token::OpenDelim(token::Brace) { + return !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL); + } + true + } else { + false + } + } + + /// Parses prefix-forms of range notation: `..expr`, `..`, `..=expr`. + fn parse_prefix_range_expr(&mut self, attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> { + // Check for deprecated `...` syntax. + if self.token == token::DotDotDot { + self.err_dotdotdot_syntax(self.token.span); + } + + debug_assert!( + [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind), + "parse_prefix_range_expr: token {:?} is not DotDot/DotDotEq", + self.token + ); + + let limits = match self.token.kind { + token::DotDot => RangeLimits::HalfOpen, + _ => RangeLimits::Closed, + }; + let op = AssocOp::from_token(&self.token); + let attrs = self.parse_or_use_outer_attributes(attrs)?; + let lo = self.token.span; + self.bump(); + let (span, opt_end) = if self.is_at_start_of_range_notation_rhs() { + // RHS must be parsed with more associativity than the dots. + self.parse_assoc_expr_with(op.unwrap().precedence() + 1, LhsExpr::NotYetParsed) + .map(|x| (lo.to(x.span), Some(x)))? + } else { + (lo, None) + }; + Ok(self.mk_expr(span, self.mk_range(None, opt_end, limits)?, attrs)) + } + + /// Parses a prefix-unary-operator expr. + fn parse_prefix_expr(&mut self, attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> { + let attrs = self.parse_or_use_outer_attributes(attrs)?; + self.maybe_collect_tokens(!attrs.is_empty(), |this| { + let lo = this.token.span; + // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() + let (hi, ex) = match this.token.uninterpolate().kind { + token::Not => this.parse_unary_expr(lo, UnOp::Not), // `!expr` + token::Tilde => this.recover_tilde_expr(lo), // `~expr` + token::BinOp(token::Minus) => this.parse_unary_expr(lo, UnOp::Neg), // `-expr` + token::BinOp(token::Star) => this.parse_unary_expr(lo, UnOp::Deref), // `*expr` + token::BinOp(token::And) | token::AndAnd => this.parse_borrow_expr(lo), + token::Ident(..) if this.token.is_keyword(kw::Box) => this.parse_box_expr(lo), + token::Ident(..) if this.is_mistaken_not_ident_negation() => { + this.recover_not_expr(lo) + } + _ => return this.parse_dot_or_call_expr(Some(attrs)), + }?; + Ok(this.mk_expr(lo.to(hi), ex, attrs)) + }) + } + + fn parse_prefix_expr_common(&mut self, lo: Span) -> PResult<'a, (Span, P<Expr>)> { + self.bump(); + let expr = self.parse_prefix_expr(None); + let (span, expr) = self.interpolated_or_expr_span(expr)?; + Ok((lo.to(span), expr)) + } + + fn parse_unary_expr(&mut self, lo: Span, op: UnOp) -> PResult<'a, (Span, ExprKind)> { + let (span, expr) = self.parse_prefix_expr_common(lo)?; + Ok((span, self.mk_unary(op, expr))) + } + + // Recover on `!` suggesting for bitwise negation instead. + fn recover_tilde_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + self.struct_span_err(lo, "`~` cannot be used as a unary operator") + .span_suggestion_short( + lo, + "use `!` to perform bitwise not", + "!".to_owned(), + Applicability::MachineApplicable, + ) + .emit(); + + self.parse_unary_expr(lo, UnOp::Not) + } + + /// Parse `box expr`. + fn parse_box_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + let (span, expr) = self.parse_prefix_expr_common(lo)?; + self.sess.gated_spans.gate(sym::box_syntax, span); + Ok((span, ExprKind::Box(expr))) + } + + fn is_mistaken_not_ident_negation(&self) -> bool { + let token_cannot_continue_expr = |t: &Token| match t.uninterpolate().kind { + // These tokens can start an expression after `!`, but + // can't continue an expression after an ident + token::Ident(name, is_raw) => token::ident_can_begin_expr(name, t.span, is_raw), + token::Literal(..) | token::Pound => true, + _ => t.is_whole_expr(), + }; + self.token.is_ident_named(sym::not) && self.look_ahead(1, token_cannot_continue_expr) + } + + /// Recover on `not expr` in favor of `!expr`. + fn recover_not_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + // Emit the error... + let not_token = self.look_ahead(1, |t| t.clone()); + self.struct_span_err( + not_token.span, + &format!("unexpected {} after identifier", super::token_descr(¬_token)), + ) + .span_suggestion_short( + // Span the `not` plus trailing whitespace to avoid + // trailing whitespace after the `!` in our suggestion + self.sess.source_map().span_until_non_whitespace(lo.to(not_token.span)), + "use `!` to perform logical negation", + "!".to_owned(), + Applicability::MachineApplicable, + ) + .emit(); + + // ...and recover! + self.parse_unary_expr(lo, UnOp::Not) + } + + /// Returns the span of expr, if it was not interpolated or the span of the interpolated token. + fn interpolated_or_expr_span( + &self, + expr: PResult<'a, P<Expr>>, + ) -> PResult<'a, (Span, P<Expr>)> { + expr.map(|e| { + ( + match self.prev_token.kind { + TokenKind::Interpolated(..) => self.prev_token.span, + _ => e.span, + }, + e, + ) + }) + } + + fn parse_assoc_op_cast( + &mut self, + lhs: P<Expr>, + lhs_span: Span, + expr_kind: fn(P<Expr>, P<Ty>) -> ExprKind, + ) -> PResult<'a, P<Expr>> { + let mk_expr = |this: &mut Self, rhs: P<Ty>| { + this.mk_expr(lhs_span.to(rhs.span), expr_kind(lhs, rhs), AttrVec::new()) + }; + + // Save the state of the parser before parsing type normally, in case there is a + // LessThan comparison after this cast. + let parser_snapshot_before_type = self.clone(); + let cast_expr = match self.parse_ty_no_plus() { + Ok(rhs) => mk_expr(self, rhs), + Err(mut type_err) => { + // Rewind to before attempting to parse the type with generics, to recover + // from situations like `x as usize < y` in which we first tried to parse + // `usize < y` as a type with generic arguments. + let parser_snapshot_after_type = mem::replace(self, parser_snapshot_before_type); + + match self.parse_path(PathStyle::Expr) { + Ok(path) => { + let (op_noun, op_verb) = match self.token.kind { + token::Lt => ("comparison", "comparing"), + token::BinOp(token::Shl) => ("shift", "shifting"), + _ => { + // We can end up here even without `<` being the next token, for + // example because `parse_ty_no_plus` returns `Err` on keywords, + // but `parse_path` returns `Ok` on them due to error recovery. + // Return original error and parser state. + *self = parser_snapshot_after_type; + return Err(type_err); + } + }; + + // Successfully parsed the type path leaving a `<` yet to parse. + type_err.cancel(); + + // Report non-fatal diagnostics, keep `x as usize` as an expression + // in AST and continue parsing. + let msg = format!( + "`<` is interpreted as a start of generic arguments for `{}`, not a {}", + pprust::path_to_string(&path), + op_noun, + ); + let span_after_type = parser_snapshot_after_type.token.span; + let expr = mk_expr(self, self.mk_ty(path.span, TyKind::Path(None, path))); + + let expr_str = self + .span_to_snippet(expr.span) + .unwrap_or_else(|_| pprust::expr_to_string(&expr)); + + self.struct_span_err(self.token.span, &msg) + .span_label( + self.look_ahead(1, |t| t.span).to(span_after_type), + "interpreted as generic arguments", + ) + .span_label(self.token.span, format!("not interpreted as {}", op_noun)) + .span_suggestion( + expr.span, + &format!("try {} the cast value", op_verb), + format!("({})", expr_str), + Applicability::MachineApplicable, + ) + .emit(); + + expr + } + Err(mut path_err) => { + // Couldn't parse as a path, return original error and parser state. + path_err.cancel(); + *self = parser_snapshot_after_type; + return Err(type_err); + } + } + } + }; + + self.parse_and_disallow_postfix_after_cast(cast_expr) + } + + /// Parses a postfix operators such as `.`, `?`, or index (`[]`) after a cast, + /// then emits an error and returns the newly parsed tree. + /// The resulting parse tree for `&x as T[0]` has a precedence of `((&x) as T)[0]`. + fn parse_and_disallow_postfix_after_cast( + &mut self, + cast_expr: P<Expr>, + ) -> PResult<'a, P<Expr>> { + // Save the memory location of expr before parsing any following postfix operators. + // This will be compared with the memory location of the output expression. + // If they different we can assume we parsed another expression because the existing expression is not reallocated. + let addr_before = &*cast_expr as *const _ as usize; + let span = cast_expr.span; + let with_postfix = self.parse_dot_or_call_expr_with_(cast_expr, span)?; + let changed = addr_before != &*with_postfix as *const _ as usize; + + // Check if an illegal postfix operator has been added after the cast. + // If the resulting expression is not a cast, or has a different memory location, it is an illegal postfix operator. + if !matches!(with_postfix.kind, ExprKind::Cast(_, _) | ExprKind::Type(_, _)) || changed { + let msg = format!( + "casts cannot be followed by {}", + match with_postfix.kind { + ExprKind::Index(_, _) => "indexing", + ExprKind::Try(_) => "?", + ExprKind::Field(_, _) => "a field access", + ExprKind::MethodCall(_, _, _) => "a method call", + ExprKind::Call(_, _) => "a function call", + ExprKind::Await(_) => "`.await`", + ExprKind::Err => return Ok(with_postfix), + _ => unreachable!("parse_dot_or_call_expr_with_ shouldn't produce this"), + } + ); + let mut err = self.struct_span_err(span, &msg); + // If type ascription is "likely an error", the user will already be getting a useful + // help message, and doesn't need a second. + if self.last_type_ascription.map_or(false, |last_ascription| last_ascription.1) { + self.maybe_annotate_with_ascription(&mut err, false); + } else { + let suggestions = vec![ + (span.shrink_to_lo(), "(".to_string()), + (span.shrink_to_hi(), ")".to_string()), + ]; + err.multipart_suggestion( + "try surrounding the expression in parentheses", + suggestions, + Applicability::MachineApplicable, + ); + } + err.emit(); + }; + Ok(with_postfix) + } + + fn parse_assoc_op_ascribe(&mut self, lhs: P<Expr>, lhs_span: Span) -> PResult<'a, P<Expr>> { + let maybe_path = self.could_ascription_be_path(&lhs.kind); + self.last_type_ascription = Some((self.prev_token.span, maybe_path)); + let lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Type)?; + self.sess.gated_spans.gate(sym::type_ascription, lhs.span); + Ok(lhs) + } + + /// Parse `& mut? <expr>` or `& raw [ const | mut ] <expr>`. + fn parse_borrow_expr(&mut self, lo: Span) -> PResult<'a, (Span, ExprKind)> { + self.expect_and()?; + let has_lifetime = self.token.is_lifetime() && self.look_ahead(1, |t| t != &token::Colon); + let lifetime = has_lifetime.then(|| self.expect_lifetime()); // For recovery, see below. + let (borrow_kind, mutbl) = self.parse_borrow_modifiers(lo); + let expr = self.parse_prefix_expr(None); + let (hi, expr) = self.interpolated_or_expr_span(expr)?; + let span = lo.to(hi); + if let Some(lt) = lifetime { + self.error_remove_borrow_lifetime(span, lt.ident.span); + } + Ok((span, ExprKind::AddrOf(borrow_kind, mutbl, expr))) + } + + fn error_remove_borrow_lifetime(&self, span: Span, lt_span: Span) { + self.struct_span_err(span, "borrow expressions cannot be annotated with lifetimes") + .span_label(lt_span, "annotated with lifetime here") + .span_suggestion( + lt_span, + "remove the lifetime annotation", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + + /// Parse `mut?` or `raw [ const | mut ]`. + fn parse_borrow_modifiers(&mut self, lo: Span) -> (ast::BorrowKind, ast::Mutability) { + if self.check_keyword(kw::Raw) && self.look_ahead(1, Token::is_mutability) { + // `raw [ const | mut ]`. + let found_raw = self.eat_keyword(kw::Raw); + assert!(found_raw); + let mutability = self.parse_const_or_mut().unwrap(); + self.sess.gated_spans.gate(sym::raw_ref_op, lo.to(self.prev_token.span)); + (ast::BorrowKind::Raw, mutability) + } else { + // `mut?` + (ast::BorrowKind::Ref, self.parse_mutability()) + } + } + + /// Parses `a.b` or `a(13)` or `a[4]` or just `a`. + fn parse_dot_or_call_expr(&mut self, attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> { + let attrs = self.parse_or_use_outer_attributes(attrs)?; + let base = self.parse_bottom_expr(); + let (span, base) = self.interpolated_or_expr_span(base)?; + self.parse_dot_or_call_expr_with(base, span, attrs) + } + + pub(super) fn parse_dot_or_call_expr_with( + &mut self, + e0: P<Expr>, + lo: Span, + mut attrs: AttrVec, + ) -> PResult<'a, P<Expr>> { + // Stitch the list of outer attributes onto the return value. + // A little bit ugly, but the best way given the current code + // structure + self.parse_dot_or_call_expr_with_(e0, lo).map(|expr| { + expr.map(|mut expr| { + attrs.extend::<Vec<_>>(expr.attrs.into()); + expr.attrs = attrs; + expr + }) + }) + } + + fn parse_dot_or_call_expr_with_(&mut self, mut e: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> { + loop { + if self.eat(&token::Question) { + // `expr?` + e = self.mk_expr(lo.to(self.prev_token.span), ExprKind::Try(e), AttrVec::new()); + continue; + } + if self.eat(&token::Dot) { + // expr.f + e = self.parse_dot_suffix_expr(lo, e)?; + continue; + } + if self.expr_is_complete(&e) { + return Ok(e); + } + e = match self.token.kind { + token::OpenDelim(token::Paren) => self.parse_fn_call_expr(lo, e), + token::OpenDelim(token::Bracket) => self.parse_index_expr(lo, e)?, + _ => return Ok(e), + } + } + } + + fn parse_dot_suffix_expr(&mut self, lo: Span, base: P<Expr>) -> PResult<'a, P<Expr>> { + match self.token.uninterpolate().kind { + token::Ident(..) => self.parse_dot_suffix(base, lo), + token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { + Ok(self.parse_tuple_field_access_expr(lo, base, symbol, suffix, None)) + } + token::Literal(token::Lit { kind: token::Float, symbol, suffix }) => { + Ok(self.parse_tuple_field_access_expr_float(lo, base, symbol, suffix)) + } + _ => { + self.error_unexpected_after_dot(); + Ok(base) + } + } + } + + fn error_unexpected_after_dot(&self) { + // FIXME Could factor this out into non_fatal_unexpected or something. + let actual = pprust::token_to_string(&self.token); + self.struct_span_err(self.token.span, &format!("unexpected token: `{}`", actual)).emit(); + } + + // We need and identifier or integer, but the next token is a float. + // Break the float into components to extract the identifier or integer. + // FIXME: With current `TokenCursor` it's hard to break tokens into more than 2 + // parts unless those parts are processed immediately. `TokenCursor` should either + // support pushing "future tokens" (would be also helpful to `break_and_eat`), or + // we should break everything including floats into more basic proc-macro style + // tokens in the lexer (probably preferable). + fn parse_tuple_field_access_expr_float( + &mut self, + lo: Span, + base: P<Expr>, + float: Symbol, + suffix: Option<Symbol>, + ) -> P<Expr> { + #[derive(Debug)] + enum FloatComponent { + IdentLike(String), + Punct(char), + } + use FloatComponent::*; + + let mut components = Vec::new(); + let mut ident_like = String::new(); + for c in float.as_str().chars() { + if c == '_' || c.is_ascii_alphanumeric() { + ident_like.push(c); + } else if matches!(c, '.' | '+' | '-') { + if !ident_like.is_empty() { + components.push(IdentLike(mem::take(&mut ident_like))); + } + components.push(Punct(c)); + } else { + panic!("unexpected character in a float token: {:?}", c) + } + } + if !ident_like.is_empty() { + components.push(IdentLike(ident_like)); + } + + // FIXME: Make the span more precise. + let span = self.token.span; + match &*components { + // 1e2 + [IdentLike(i)] => { + self.parse_tuple_field_access_expr(lo, base, Symbol::intern(&i), suffix, None) + } + // 1. + [IdentLike(i), Punct('.')] => { + assert!(suffix.is_none()); + let symbol = Symbol::intern(&i); + self.token = Token::new(token::Ident(symbol, false), span); + let next_token = Token::new(token::Dot, span); + self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token)) + } + // 1.2 | 1.2e3 + [IdentLike(i1), Punct('.'), IdentLike(i2)] => { + let symbol1 = Symbol::intern(&i1); + self.token = Token::new(token::Ident(symbol1, false), span); + let next_token1 = Token::new(token::Dot, span); + let base1 = + self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1)); + let symbol2 = Symbol::intern(&i2); + let next_token2 = Token::new(token::Ident(symbol2, false), span); + self.bump_with(next_token2); // `.` + self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None) + } + // 1e+ | 1e- (recovered) + [IdentLike(_), Punct('+' | '-')] | + // 1e+2 | 1e-2 + [IdentLike(_), Punct('+' | '-'), IdentLike(_)] | + // 1.2e+3 | 1.2e-3 + [IdentLike(_), Punct('.'), IdentLike(_), Punct('+' | '-'), IdentLike(_)] => { + // See the FIXME about `TokenCursor` above. + self.error_unexpected_after_dot(); + base + } + _ => panic!("unexpected components in a float token: {:?}", components), + } + } + + fn parse_tuple_field_access_expr( + &mut self, + lo: Span, + base: P<Expr>, + field: Symbol, + suffix: Option<Symbol>, + next_token: Option<Token>, + ) -> P<Expr> { + match next_token { + Some(next_token) => self.bump_with(next_token), + None => self.bump(), + } + let span = self.prev_token.span; + let field = ExprKind::Field(base, Ident::new(field, span)); + self.expect_no_suffix(span, "a tuple index", suffix); + self.mk_expr(lo.to(span), field, AttrVec::new()) + } + + /// Parse a function call expression, `expr(...)`. + fn parse_fn_call_expr(&mut self, lo: Span, fun: P<Expr>) -> P<Expr> { + let seq = self.parse_paren_expr_seq().map(|args| { + self.mk_expr(lo.to(self.prev_token.span), self.mk_call(fun, args), AttrVec::new()) + }); + self.recover_seq_parse_error(token::Paren, lo, seq) + } + + /// Parse an indexing expression `expr[...]`. + fn parse_index_expr(&mut self, lo: Span, base: P<Expr>) -> PResult<'a, P<Expr>> { + self.bump(); // `[` + let index = self.parse_expr()?; + self.expect(&token::CloseDelim(token::Bracket))?; + Ok(self.mk_expr(lo.to(self.prev_token.span), self.mk_index(base, index), AttrVec::new())) + } + + /// Assuming we have just parsed `.`, continue parsing into an expression. + fn parse_dot_suffix(&mut self, self_arg: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> { + if self.token.uninterpolated_span().rust_2018() && self.eat_keyword(kw::Await) { + return self.mk_await_expr(self_arg, lo); + } + + let fn_span_lo = self.token.span; + let mut segment = self.parse_path_segment(PathStyle::Expr)?; + self.check_trailing_angle_brackets(&segment, &[&token::OpenDelim(token::Paren)]); + self.check_turbofish_missing_angle_brackets(&mut segment); + + if self.check(&token::OpenDelim(token::Paren)) { + // Method call `expr.f()` + let mut args = self.parse_paren_expr_seq()?; + args.insert(0, self_arg); + + let fn_span = fn_span_lo.to(self.prev_token.span); + let span = lo.to(self.prev_token.span); + Ok(self.mk_expr(span, ExprKind::MethodCall(segment, args, fn_span), AttrVec::new())) + } else { + // Field access `expr.f` + if let Some(args) = segment.args { + self.struct_span_err( + args.span(), + "field expressions cannot have generic arguments", + ) + .emit(); + } + + let span = lo.to(self.prev_token.span); + Ok(self.mk_expr(span, ExprKind::Field(self_arg, segment.ident), AttrVec::new())) + } + } + + /// At the bottom (top?) of the precedence hierarchy, + /// Parses things like parenthesized exprs, macros, `return`, etc. + /// + /// N.B., this does not parse outer attributes, and is private because it only works + /// correctly if called from `parse_dot_or_call_expr()`. + fn parse_bottom_expr(&mut self) -> PResult<'a, P<Expr>> { + maybe_recover_from_interpolated_ty_qpath!(self, true); + maybe_whole_expr!(self); + + // Outer attributes are already parsed and will be + // added to the return value after the fact. + // + // Therefore, prevent sub-parser from parsing + // attributes by giving them a empty "already-parsed" list. + let attrs = AttrVec::new(); + + // Note: when adding new syntax here, don't forget to adjust `TokenKind::can_begin_expr()`. + let lo = self.token.span; + if let token::Literal(_) = self.token.kind { + // This match arm is a special-case of the `_` match arm below and + // could be removed without changing functionality, but it's faster + // to have it here, especially for programs with large constants. + self.parse_lit_expr(attrs) + } else if self.check(&token::OpenDelim(token::Paren)) { + self.parse_tuple_parens_expr(attrs) + } else if self.check(&token::OpenDelim(token::Brace)) { + self.parse_block_expr(None, lo, BlockCheckMode::Default, attrs) + } else if self.check(&token::BinOp(token::Or)) || self.check(&token::OrOr) { + self.parse_closure_expr(attrs) + } else if self.check(&token::OpenDelim(token::Bracket)) { + self.parse_array_or_repeat_expr(attrs) + } else if self.eat_lt() { + let (qself, path) = self.parse_qpath(PathStyle::Expr)?; + Ok(self.mk_expr(lo.to(path.span), ExprKind::Path(Some(qself), path), attrs)) + } else if self.check_path() { + self.parse_path_start_expr(attrs) + } else if self.check_keyword(kw::Move) || self.check_keyword(kw::Static) { + self.parse_closure_expr(attrs) + } else if self.eat_keyword(kw::If) { + self.parse_if_expr(attrs) + } else if self.check_keyword(kw::For) { + if self.choose_generics_over_qpath(1) { + // NOTE(Centril, eddyb): DO NOT REMOVE! Beyond providing parser recovery, + // this is an insurance policy in case we allow qpaths in (tuple-)struct patterns. + // When `for <Foo as Bar>::Proj in $expr $block` is wanted, + // you can disambiguate in favor of a pattern with `(...)`. + self.recover_quantified_closure_expr(attrs) + } else { + assert!(self.eat_keyword(kw::For)); + self.parse_for_expr(None, self.prev_token.span, attrs) + } + } else if self.eat_keyword(kw::While) { + self.parse_while_expr(None, self.prev_token.span, attrs) + } else if let Some(label) = self.eat_label() { + self.parse_labeled_expr(label, attrs) + } else if self.eat_keyword(kw::Loop) { + self.parse_loop_expr(None, self.prev_token.span, attrs) + } else if self.eat_keyword(kw::Continue) { + let kind = ExprKind::Continue(self.eat_label()); + Ok(self.mk_expr(lo.to(self.prev_token.span), kind, attrs)) + } else if self.eat_keyword(kw::Match) { + let match_sp = self.prev_token.span; + self.parse_match_expr(attrs).map_err(|mut err| { + err.span_label(match_sp, "while parsing this match expression"); + err + }) + } else if self.eat_keyword(kw::Unsafe) { + self.parse_block_expr(None, lo, BlockCheckMode::Unsafe(ast::UserProvided), attrs) + } else if self.is_do_catch_block() { + self.recover_do_catch(attrs) + } else if self.is_try_block() { + self.expect_keyword(kw::Try)?; + self.parse_try_block(lo, attrs) + } else if self.eat_keyword(kw::Return) { + self.parse_return_expr(attrs) + } else if self.eat_keyword(kw::Break) { + self.parse_break_expr(attrs) + } else if self.eat_keyword(kw::Yield) { + self.parse_yield_expr(attrs) + } else if self.eat_keyword(kw::Let) { + self.parse_let_expr(attrs) + } else if !self.unclosed_delims.is_empty() && self.check(&token::Semi) { + // Don't complain about bare semicolons after unclosed braces + // recovery in order to keep the error count down. Fixing the + // delimiters will possibly also fix the bare semicolon found in + // expression context. For example, silence the following error: + // + // error: expected expression, found `;` + // --> file.rs:2:13 + // | + // 2 | foo(bar(; + // | ^ expected expression + self.bump(); + Ok(self.mk_expr_err(self.token.span)) + } else if self.token.uninterpolated_span().rust_2018() { + // `Span::rust_2018()` is somewhat expensive; don't get it repeatedly. + if self.check_keyword(kw::Async) { + if self.is_async_block() { + // Check for `async {` and `async move {`. + self.parse_async_block(attrs) + } else { + self.parse_closure_expr(attrs) + } + } else if self.eat_keyword(kw::Await) { + self.recover_incorrect_await_syntax(lo, self.prev_token.span, attrs) + } else { + self.parse_lit_expr(attrs) + } + } else { + self.parse_lit_expr(attrs) + } + } + + fn maybe_collect_tokens( + &mut self, + has_outer_attrs: bool, + f: impl FnOnce(&mut Self) -> PResult<'a, P<Expr>>, + ) -> PResult<'a, P<Expr>> { + if has_outer_attrs { + let (mut expr, tokens) = self.collect_tokens(f)?; + debug!("maybe_collect_tokens: Collected tokens for {:?} (tokens {:?}", expr, tokens); + expr.tokens = Some(tokens); + Ok(expr) + } else { + f(self) + } + } + + fn parse_lit_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.token.span; + match self.parse_opt_lit() { + Some(literal) => { + let expr = self.mk_expr(lo.to(self.prev_token.span), ExprKind::Lit(literal), attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + None => self.try_macro_suggestion(), + } + } + + fn parse_tuple_parens_expr(&mut self, mut attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.token.span; + self.expect(&token::OpenDelim(token::Paren))?; + attrs.extend(self.parse_inner_attributes()?); // `(#![foo] a, b, ...)` is OK. + let (es, trailing_comma) = match self.parse_seq_to_end( + &token::CloseDelim(token::Paren), + SeqSep::trailing_allowed(token::Comma), + |p| p.parse_expr_catch_underscore(), + ) { + Ok(x) => x, + Err(err) => return Ok(self.recover_seq_parse_error(token::Paren, lo, Err(err))), + }; + let kind = if es.len() == 1 && !trailing_comma { + // `(e)` is parenthesized `e`. + ExprKind::Paren(es.into_iter().next().unwrap()) + } else { + // `(e,)` is a tuple with only one field, `e`. + ExprKind::Tup(es) + }; + let expr = self.mk_expr(lo.to(self.prev_token.span), kind, attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + fn parse_array_or_repeat_expr(&mut self, mut attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.token.span; + self.bump(); // `[` + + attrs.extend(self.parse_inner_attributes()?); + + let close = &token::CloseDelim(token::Bracket); + let kind = if self.eat(close) { + // Empty vector + ExprKind::Array(Vec::new()) + } else { + // Non-empty vector + let first_expr = self.parse_expr()?; + if self.eat(&token::Semi) { + // Repeating array syntax: `[ 0; 512 ]` + let count = self.parse_anon_const_expr()?; + self.expect(close)?; + ExprKind::Repeat(first_expr, count) + } else if self.eat(&token::Comma) { + // Vector with two or more elements. + let sep = SeqSep::trailing_allowed(token::Comma); + let (remaining_exprs, _) = self.parse_seq_to_end(close, sep, |p| p.parse_expr())?; + let mut exprs = vec![first_expr]; + exprs.extend(remaining_exprs); + ExprKind::Array(exprs) + } else { + // Vector with one element + self.expect(close)?; + ExprKind::Array(vec![first_expr]) + } + }; + let expr = self.mk_expr(lo.to(self.prev_token.span), kind, attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + fn parse_path_start_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let path = self.parse_path(PathStyle::Expr)?; + let lo = path.span; + + // `!`, as an operator, is prefix, so we know this isn't that. + let (hi, kind) = if self.eat(&token::Not) { + // MACRO INVOCATION expression + let mac = MacCall { + path, + args: self.parse_mac_args()?, + prior_type_ascription: self.last_type_ascription, + }; + (self.prev_token.span, ExprKind::MacCall(mac)) + } else if self.check(&token::OpenDelim(token::Brace)) { + if let Some(expr) = self.maybe_parse_struct_expr(&path, &attrs) { + return expr; + } else { + (path.span, ExprKind::Path(None, path)) + } + } else { + (path.span, ExprKind::Path(None, path)) + }; + + let expr = self.mk_expr(lo.to(hi), kind, attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + /// Parse `'label: $expr`. The label is already parsed. + fn parse_labeled_expr(&mut self, label: Label, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = label.ident.span; + let label = Some(label); + let ate_colon = self.eat(&token::Colon); + let expr = if self.eat_keyword(kw::While) { + self.parse_while_expr(label, lo, attrs) + } else if self.eat_keyword(kw::For) { + self.parse_for_expr(label, lo, attrs) + } else if self.eat_keyword(kw::Loop) { + self.parse_loop_expr(label, lo, attrs) + } else if self.check(&token::OpenDelim(token::Brace)) || self.token.is_whole_block() { + self.parse_block_expr(label, lo, BlockCheckMode::Default, attrs) + } else { + let msg = "expected `while`, `for`, `loop` or `{` after a label"; + self.struct_span_err(self.token.span, msg).span_label(self.token.span, msg).emit(); + // Continue as an expression in an effort to recover on `'label: non_block_expr`. + self.parse_expr() + }?; + + if !ate_colon { + self.error_labeled_expr_must_be_followed_by_colon(lo, expr.span); + } + + Ok(expr) + } + + fn error_labeled_expr_must_be_followed_by_colon(&self, lo: Span, span: Span) { + self.struct_span_err(span, "labeled expression must be followed by `:`") + .span_label(lo, "the label") + .span_suggestion_short( + lo.shrink_to_hi(), + "add `:` after the label", + ": ".to_string(), + Applicability::MachineApplicable, + ) + .note("labels are used before loops and blocks, allowing e.g., `break 'label` to them") + .emit(); + } + + /// Recover on the syntax `do catch { ... }` suggesting `try { ... }` instead. + fn recover_do_catch(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.token.span; + + self.bump(); // `do` + self.bump(); // `catch` + + let span_dc = lo.to(self.prev_token.span); + self.struct_span_err(span_dc, "found removed `do catch` syntax") + .span_suggestion( + span_dc, + "replace with the new syntax", + "try".to_string(), + Applicability::MachineApplicable, + ) + .note("following RFC #2388, the new non-placeholder syntax is `try`") + .emit(); + + self.parse_try_block(lo, attrs) + } + + /// Parse an expression if the token can begin one. + fn parse_expr_opt(&mut self) -> PResult<'a, Option<P<Expr>>> { + Ok(if self.token.can_begin_expr() { Some(self.parse_expr()?) } else { None }) + } + + /// Parse `"return" expr?`. + fn parse_return_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.prev_token.span; + let kind = ExprKind::Ret(self.parse_expr_opt()?); + let expr = self.mk_expr(lo.to(self.prev_token.span), kind, attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + /// Parse `"('label ":")? break expr?`. + fn parse_break_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.prev_token.span; + let label = self.eat_label(); + let kind = if self.token != token::OpenDelim(token::Brace) + || !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL) + { + self.parse_expr_opt()? + } else { + None + }; + let expr = self.mk_expr(lo.to(self.prev_token.span), ExprKind::Break(label, kind), attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + /// Parse `"yield" expr?`. + fn parse_yield_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.prev_token.span; + let kind = ExprKind::Yield(self.parse_expr_opt()?); + let span = lo.to(self.prev_token.span); + self.sess.gated_spans.gate(sym::generators, span); + let expr = self.mk_expr(span, kind, attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } + + /// Returns a string literal if the next token is a string literal. + /// In case of error returns `Some(lit)` if the next token is a literal with a wrong kind, + /// and returns `None` if the next token is not literal at all. + pub fn parse_str_lit(&mut self) -> Result<ast::StrLit, Option<Lit>> { + match self.parse_opt_lit() { + Some(lit) => match lit.kind { + ast::LitKind::Str(symbol_unescaped, style) => Ok(ast::StrLit { + style, + symbol: lit.token.symbol, + suffix: lit.token.suffix, + span: lit.span, + symbol_unescaped, + }), + _ => Err(Some(lit)), + }, + None => Err(None), + } + } + + pub(super) fn parse_lit(&mut self) -> PResult<'a, Lit> { + self.parse_opt_lit().ok_or_else(|| { + let msg = format!("unexpected token: {}", super::token_descr(&self.token)); + self.struct_span_err(self.token.span, &msg) + }) + } + + /// Matches `lit = true | false | token_lit`. + /// Returns `None` if the next token is not a literal. + pub(super) fn parse_opt_lit(&mut self) -> Option<Lit> { + let mut recovered = None; + if self.token == token::Dot { + // Attempt to recover `.4` as `0.4`. We don't currently have any syntax where + // dot would follow an optional literal, so we do this unconditionally. + recovered = self.look_ahead(1, |next_token| { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = + next_token.kind + { + if self.token.span.hi() == next_token.span.lo() { + let s = String::from("0.") + &symbol.as_str(); + let kind = TokenKind::lit(token::Float, Symbol::intern(&s), suffix); + return Some(Token::new(kind, self.token.span.to(next_token.span))); + } + } + None + }); + if let Some(token) = &recovered { + self.bump(); + self.error_float_lits_must_have_int_part(&token); + } + } + + let token = recovered.as_ref().unwrap_or(&self.token); + match Lit::from_token(token) { + Ok(lit) => { + self.bump(); + Some(lit) + } + Err(LitError::NotLiteral) => None, + Err(err) => { + let span = token.span; + let lit = match token.kind { + token::Literal(lit) => lit, + _ => unreachable!(), + }; + self.bump(); + self.report_lit_error(err, lit, span); + // Pack possible quotes and prefixes from the original literal into + // the error literal's symbol so they can be pretty-printed faithfully. + let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); + let symbol = Symbol::intern(&suffixless_lit.to_string()); + let lit = token::Lit::new(token::Err, symbol, lit.suffix); + Some(Lit::from_lit_token(lit, span).unwrap_or_else(|_| unreachable!())) + } + } + } + + fn error_float_lits_must_have_int_part(&self, token: &Token) { + self.struct_span_err(token.span, "float literals must have an integer part") + .span_suggestion( + token.span, + "must have an integer part", + pprust::token_to_string(token), + Applicability::MachineApplicable, + ) + .emit(); + } + + fn report_lit_error(&self, err: LitError, lit: token::Lit, span: Span) { + // Checks if `s` looks like i32 or u1234 etc. + fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) + } + + let token::Lit { kind, suffix, .. } = lit; + match err { + // `NotLiteral` is not an error by itself, so we don't report + // it and give the parser opportunity to try something else. + LitError::NotLiteral => {} + // `LexerError` *is* an error, but it was already reported + // by lexer, so here we don't report it the second time. + LitError::LexerError => {} + LitError::InvalidSuffix => { + self.expect_no_suffix( + span, + &format!("{} {} literal", kind.article(), kind.descr()), + suffix, + ); + } + LitError::InvalidIntSuffix => { + let suf = suffix.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['i', 'u'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for integer literal", &suf[1..]); + self.struct_span_err(span, &msg) + .help("valid widths are 8, 16, 32, 64 and 128") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for integer literal", suf); + self.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("the suffix must be one of the integral types (`u32`, `isize`, etc)") + .emit(); + } + } + LitError::InvalidFloatSuffix => { + let suf = suffix.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['f'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for float literal", &suf[1..]); + self.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit(); + } else { + let msg = format!("invalid suffix `{}` for float literal", suf); + self.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("valid suffixes are `f32` and `f64`") + .emit(); + } + } + LitError::NonDecimalFloat(base) => { + let descr = match base { + 16 => "hexadecimal", + 8 => "octal", + 2 => "binary", + _ => unreachable!(), + }; + self.struct_span_err(span, &format!("{} float literal is not supported", descr)) + .span_label(span, "not supported") + .emit(); + } + LitError::IntTooLarge => { + self.struct_span_err(span, "integer literal is too large").emit(); + } + } + } + + pub(super) fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option<Symbol>) { + if let Some(suf) = suffix { + let mut err = if kind == "a tuple index" + && [sym::i32, sym::u32, sym::isize, sym::usize].contains(&suf) + { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = self + .sess + .span_diagnostic + .struct_span_warn(sp, &format!("suffixes on {} are invalid", kind)); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + suf, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "see issue #60210 <https://github.com/rust-lang/rust/issues/60210> \ + for more information", + ); + err + } else { + self.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) + }; + err.span_label(sp, format!("invalid suffix `{}`", suf)); + err.emit(); + } + } + + /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). + /// Keep this in sync with `Token::can_begin_literal_maybe_minus`. + pub(super) fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> { + maybe_whole_expr!(self); + + let lo = self.token.span; + let minus_present = self.eat(&token::BinOp(token::Minus)); + let lit = self.parse_lit()?; + let expr = self.mk_expr(lit.span, ExprKind::Lit(lit), AttrVec::new()); + + if minus_present { + Ok(self.mk_expr( + lo.to(self.prev_token.span), + self.mk_unary(UnOp::Neg, expr), + AttrVec::new(), + )) + } else { + Ok(expr) + } + } + + /// Parses a block or unsafe block. + pub(super) fn parse_block_expr( + &mut self, + opt_label: Option<Label>, + lo: Span, + blk_mode: BlockCheckMode, + mut attrs: AttrVec, + ) -> PResult<'a, P<Expr>> { + if let Some(label) = opt_label { + self.sess.gated_spans.gate(sym::label_break_value, label.ident.span); + } + + if self.token.is_whole_block() { + self.struct_span_err(self.token.span, "cannot use a `block` macro fragment here") + .span_label(lo.to(self.token.span), "the `block` fragment is within this context") + .emit(); + } + + let (inner_attrs, blk) = self.parse_block_common(lo, blk_mode)?; + attrs.extend(inner_attrs); + Ok(self.mk_expr(blk.span, ExprKind::Block(blk, opt_label), attrs)) + } + + /// Recover on an explicitly quantified closure expression, e.g., `for<'a> |x: &'a u8| *x + 1`. + fn recover_quantified_closure_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.token.span; + let _ = self.parse_late_bound_lifetime_defs()?; + let span_for = lo.to(self.prev_token.span); + let closure = self.parse_closure_expr(attrs)?; + + self.struct_span_err(span_for, "cannot introduce explicit parameters for a closure") + .span_label(closure.span, "the parameters are attached to this closure") + .span_suggestion( + span_for, + "remove the parameters", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + + Ok(self.mk_expr_err(lo.to(closure.span))) + } + + /// Parses a closure expression (e.g., `move |args| expr`). + fn parse_closure_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.token.span; + + let movability = + if self.eat_keyword(kw::Static) { Movability::Static } else { Movability::Movable }; + + let asyncness = if self.token.uninterpolated_span().rust_2018() { + self.parse_asyncness() + } else { + Async::No + }; + if let Async::Yes { span, .. } = asyncness { + // Feature-gate `async ||` closures. + self.sess.gated_spans.gate(sym::async_closure, span); + } + + let capture_clause = self.parse_capture_clause(); + let decl = self.parse_fn_block_decl()?; + let decl_hi = self.prev_token.span; + let body = match decl.output { + FnRetTy::Default(_) => { + let restrictions = self.restrictions - Restrictions::STMT_EXPR; + self.parse_expr_res(restrictions, None)? + } + _ => { + // If an explicit return type is given, require a block to appear (RFC 968). + let body_lo = self.token.span; + self.parse_block_expr(None, body_lo, BlockCheckMode::Default, AttrVec::new())? + } + }; + + Ok(self.mk_expr( + lo.to(body.span), + ExprKind::Closure(capture_clause, asyncness, movability, decl, body, lo.to(decl_hi)), + attrs, + )) + } + + /// Parses an optional `move` prefix to a closure-like construct. + fn parse_capture_clause(&mut self) -> CaptureBy { + if self.eat_keyword(kw::Move) { CaptureBy::Value } else { CaptureBy::Ref } + } + + /// Parses the `|arg, arg|` header of a closure. + fn parse_fn_block_decl(&mut self) -> PResult<'a, P<FnDecl>> { + let inputs = if self.eat(&token::OrOr) { + Vec::new() + } else { + self.expect(&token::BinOp(token::Or))?; + let args = self + .parse_seq_to_before_tokens( + &[&token::BinOp(token::Or), &token::OrOr], + SeqSep::trailing_allowed(token::Comma), + TokenExpectType::NoExpect, + |p| p.parse_fn_block_param(), + )? + .0; + self.expect_or()?; + args + }; + let output = self.parse_ret_ty(AllowPlus::Yes, RecoverQPath::Yes)?; + + Ok(P(FnDecl { inputs, output })) + } + + /// Parses a parameter in a closure header (e.g., `|arg, arg|`). + fn parse_fn_block_param(&mut self) -> PResult<'a, Param> { + let lo = self.token.span; + let attrs = self.parse_outer_attributes()?; + let pat = self.parse_pat(PARAM_EXPECTED)?; + let ty = if self.eat(&token::Colon) { + self.parse_ty()? + } else { + self.mk_ty(self.prev_token.span, TyKind::Infer) + }; + Ok(Param { + attrs: attrs.into(), + ty, + pat, + span: lo.to(self.token.span), + id: DUMMY_NODE_ID, + is_placeholder: false, + }) + } + + /// Parses an `if` expression (`if` token already eaten). + fn parse_if_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.prev_token.span; + let cond = self.parse_cond_expr()?; + + // Verify that the parsed `if` condition makes sense as a condition. If it is a block, then + // verify that the last statement is either an implicit return (no `;`) or an explicit + // return. This won't catch blocks with an explicit `return`, but that would be caught by + // the dead code lint. + let thn = if self.eat_keyword(kw::Else) || !cond.returns() { + self.error_missing_if_cond(lo, cond.span) + } else { + let attrs = self.parse_outer_attributes()?; // For recovery. + let not_block = self.token != token::OpenDelim(token::Brace); + let block = self.parse_block().map_err(|mut err| { + if not_block { + err.span_label(lo, "this `if` expression has a condition, but no block"); + if let ExprKind::Binary(_, _, ref right) = cond.kind { + if let ExprKind::Block(_, _) = right.kind { + err.help("maybe you forgot the right operand of the condition?"); + } + } + } + err + })?; + self.error_on_if_block_attrs(lo, false, block.span, &attrs); + block + }; + let els = if self.eat_keyword(kw::Else) { Some(self.parse_else_expr()?) } else { None }; + Ok(self.mk_expr(lo.to(self.prev_token.span), ExprKind::If(cond, thn, els), attrs)) + } + + fn error_missing_if_cond(&self, lo: Span, span: Span) -> P<ast::Block> { + let sp = self.sess.source_map().next_point(lo); + self.struct_span_err(sp, "missing condition for `if` expression") + .span_label(sp, "expected if condition here") + .emit(); + self.mk_block_err(span) + } + + /// Parses the condition of a `if` or `while` expression. + fn parse_cond_expr(&mut self) -> PResult<'a, P<Expr>> { + let cond = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; + + if let ExprKind::Let(..) = cond.kind { + // Remove the last feature gating of a `let` expression since it's stable. + self.sess.gated_spans.ungate_last(sym::let_chains, cond.span); + } + + Ok(cond) + } + + /// Parses a `let $pat = $expr` pseudo-expression. + /// The `let` token has already been eaten. + fn parse_let_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.prev_token.span; + let pat = self.parse_top_pat(GateOr::No)?; + self.expect(&token::Eq)?; + let expr = self.with_res(Restrictions::NO_STRUCT_LITERAL, |this| { + this.parse_assoc_expr_with(1 + prec_let_scrutinee_needs_par(), None.into()) + })?; + let span = lo.to(expr.span); + self.sess.gated_spans.gate(sym::let_chains, span); + Ok(self.mk_expr(span, ExprKind::Let(pat, expr), attrs)) + } + + /// Parses an `else { ... }` expression (`else` token already eaten). + fn parse_else_expr(&mut self) -> PResult<'a, P<Expr>> { + let ctx_span = self.prev_token.span; // `else` + let attrs = self.parse_outer_attributes()?; // For recovery. + let expr = if self.eat_keyword(kw::If) { + self.parse_if_expr(AttrVec::new())? + } else { + let blk = self.parse_block()?; + self.mk_expr(blk.span, ExprKind::Block(blk, None), AttrVec::new()) + }; + self.error_on_if_block_attrs(ctx_span, true, expr.span, &attrs); + Ok(expr) + } + + fn error_on_if_block_attrs( + &self, + ctx_span: Span, + is_ctx_else: bool, + branch_span: Span, + attrs: &[ast::Attribute], + ) { + let (span, last) = match attrs { + [] => return, + [x0 @ xn] | [x0, .., xn] => (x0.span.to(xn.span), xn.span), + }; + let ctx = if is_ctx_else { "else" } else { "if" }; + self.struct_span_err(last, "outer attributes are not allowed on `if` and `else` branches") + .span_label(branch_span, "the attributes are attached to this branch") + .span_label(ctx_span, format!("the branch belongs to this `{}`", ctx)) + .span_suggestion( + span, + "remove the attributes", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + + /// Parses `for <src_pat> in <src_expr> <src_loop_block>` (`for` token already eaten). + fn parse_for_expr( + &mut self, + opt_label: Option<Label>, + lo: Span, + mut attrs: AttrVec, + ) -> PResult<'a, P<Expr>> { + // Record whether we are about to parse `for (`. + // This is used below for recovery in case of `for ( $stuff ) $block` + // in which case we will suggest `for $stuff $block`. + let begin_paren = match self.token.kind { + token::OpenDelim(token::Paren) => Some(self.token.span), + _ => None, + }; + + let pat = self.parse_top_pat(GateOr::Yes)?; + if !self.eat_keyword(kw::In) { + self.error_missing_in_for_loop(); + } + self.check_for_for_in_in_typo(self.prev_token.span); + let expr = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; + + let pat = self.recover_parens_around_for_head(pat, &expr, begin_paren); + + let (iattrs, loop_block) = self.parse_inner_attrs_and_block()?; + attrs.extend(iattrs); + + let kind = ExprKind::ForLoop(pat, expr, loop_block, opt_label); + Ok(self.mk_expr(lo.to(self.prev_token.span), kind, attrs)) + } + + fn error_missing_in_for_loop(&mut self) { + let (span, msg, sugg) = if self.token.is_ident_named(sym::of) { + // Possibly using JS syntax (#75311). + let span = self.token.span; + self.bump(); + (span, "try using `in` here instead", "in") + } else { + (self.prev_token.span.between(self.token.span), "try adding `in` here", " in ") + }; + self.struct_span_err(span, "missing `in` in `for` loop") + .span_suggestion_short( + span, + msg, + sugg.into(), + // Has been misleading, at least in the past (closed Issue #48492). + Applicability::MaybeIncorrect, + ) + .emit(); + } + + /// Parses a `while` or `while let` expression (`while` token already eaten). + fn parse_while_expr( + &mut self, + opt_label: Option<Label>, + lo: Span, + mut attrs: AttrVec, + ) -> PResult<'a, P<Expr>> { + let cond = self.parse_cond_expr()?; + let (iattrs, body) = self.parse_inner_attrs_and_block()?; + attrs.extend(iattrs); + Ok(self.mk_expr(lo.to(self.prev_token.span), ExprKind::While(cond, body, opt_label), attrs)) + } + + /// Parses `loop { ... }` (`loop` token already eaten). + fn parse_loop_expr( + &mut self, + opt_label: Option<Label>, + lo: Span, + mut attrs: AttrVec, + ) -> PResult<'a, P<Expr>> { + let (iattrs, body) = self.parse_inner_attrs_and_block()?; + attrs.extend(iattrs); + Ok(self.mk_expr(lo.to(self.prev_token.span), ExprKind::Loop(body, opt_label), attrs)) + } + + fn eat_label(&mut self) -> Option<Label> { + self.token.lifetime().map(|ident| { + self.bump(); + Label { ident } + }) + } + + /// Parses a `match ... { ... }` expression (`match` token already eaten). + fn parse_match_expr(&mut self, mut attrs: AttrVec) -> PResult<'a, P<Expr>> { + let match_span = self.prev_token.span; + let lo = self.prev_token.span; + let scrutinee = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; + if let Err(mut e) = self.expect(&token::OpenDelim(token::Brace)) { + if self.token == token::Semi { + e.span_suggestion_short( + match_span, + "try removing this `match`", + String::new(), + Applicability::MaybeIncorrect, // speculative + ); + } + return Err(e); + } + attrs.extend(self.parse_inner_attributes()?); + + let mut arms: Vec<Arm> = Vec::new(); + while self.token != token::CloseDelim(token::Brace) { + match self.parse_arm() { + Ok(arm) => arms.push(arm), + Err(mut e) => { + // Recover by skipping to the end of the block. + e.emit(); + self.recover_stmt(); + let span = lo.to(self.token.span); + if self.token == token::CloseDelim(token::Brace) { + self.bump(); + } + return Ok(self.mk_expr(span, ExprKind::Match(scrutinee, arms), attrs)); + } + } + } + let hi = self.token.span; + self.bump(); + Ok(self.mk_expr(lo.to(hi), ExprKind::Match(scrutinee, arms), attrs)) + } + + pub(super) fn parse_arm(&mut self) -> PResult<'a, Arm> { + let attrs = self.parse_outer_attributes()?; + let lo = self.token.span; + let pat = self.parse_top_pat(GateOr::No)?; + let guard = if self.eat_keyword(kw::If) { + let if_span = self.prev_token.span; + let cond = self.parse_expr()?; + if let ExprKind::Let(..) = cond.kind { + // Remove the last feature gating of a `let` expression since it's stable. + self.sess.gated_spans.ungate_last(sym::let_chains, cond.span); + let span = if_span.to(cond.span); + self.sess.gated_spans.gate(sym::if_let_guard, span); + } + Some(cond) + } else { + None + }; + let arrow_span = self.token.span; + self.expect(&token::FatArrow)?; + let arm_start_span = self.token.span; + + let expr = self.parse_expr_res(Restrictions::STMT_EXPR, None).map_err(|mut err| { + err.span_label(arrow_span, "while parsing the `match` arm starting here"); + err + })?; + + let require_comma = classify::expr_requires_semi_to_be_stmt(&expr) + && self.token != token::CloseDelim(token::Brace); + + let hi = self.prev_token.span; + + if require_comma { + let sm = self.sess.source_map(); + self.expect_one_of(&[token::Comma], &[token::CloseDelim(token::Brace)]).map_err( + |mut err| { + match (sm.span_to_lines(expr.span), sm.span_to_lines(arm_start_span)) { + (Ok(ref expr_lines), Ok(ref arm_start_lines)) + if arm_start_lines.lines[0].end_col == expr_lines.lines[0].end_col + && expr_lines.lines.len() == 2 + && self.token == token::FatArrow => + { + // We check whether there's any trailing code in the parse span, + // if there isn't, we very likely have the following: + // + // X | &Y => "y" + // | -- - missing comma + // | | + // | arrow_span + // X | &X => "x" + // | - ^^ self.token.span + // | | + // | parsed until here as `"y" & X` + err.span_suggestion_short( + arm_start_span.shrink_to_hi(), + "missing a comma here to end this `match` arm", + ",".to_owned(), + Applicability::MachineApplicable, + ); + } + _ => { + err.span_label( + arrow_span, + "while parsing the `match` arm starting here", + ); + } + } + err + }, + )?; + } else { + self.eat(&token::Comma); + } + + Ok(ast::Arm { + attrs, + pat, + guard, + body: expr, + span: lo.to(hi), + id: DUMMY_NODE_ID, + is_placeholder: false, + }) + } + + /// Parses a `try {...}` expression (`try` token already eaten). + fn parse_try_block(&mut self, span_lo: Span, mut attrs: AttrVec) -> PResult<'a, P<Expr>> { + let (iattrs, body) = self.parse_inner_attrs_and_block()?; + attrs.extend(iattrs); + if self.eat_keyword(kw::Catch) { + let mut error = self.struct_span_err( + self.prev_token.span, + "keyword `catch` cannot follow a `try` block", + ); + error.help("try using `match` on the result of the `try` block instead"); + error.emit(); + Err(error) + } else { + let span = span_lo.to(body.span); + self.sess.gated_spans.gate(sym::try_blocks, span); + Ok(self.mk_expr(span, ExprKind::TryBlock(body), attrs)) + } + } + + fn is_do_catch_block(&self) -> bool { + self.token.is_keyword(kw::Do) + && self.is_keyword_ahead(1, &[kw::Catch]) + && self.look_ahead(2, |t| *t == token::OpenDelim(token::Brace)) + && !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL) + } + + fn is_try_block(&self) -> bool { + self.token.is_keyword(kw::Try) + && self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) + && self.token.uninterpolated_span().rust_2018() + } + + /// Parses an `async move? {...}` expression. + fn parse_async_block(&mut self, mut attrs: AttrVec) -> PResult<'a, P<Expr>> { + let lo = self.token.span; + self.expect_keyword(kw::Async)?; + let capture_clause = self.parse_capture_clause(); + let (iattrs, body) = self.parse_inner_attrs_and_block()?; + attrs.extend(iattrs); + let kind = ExprKind::Async(capture_clause, DUMMY_NODE_ID, body); + Ok(self.mk_expr(lo.to(self.prev_token.span), kind, attrs)) + } + + fn is_async_block(&self) -> bool { + self.token.is_keyword(kw::Async) + && (( + // `async move {` + self.is_keyword_ahead(1, &[kw::Move]) + && self.look_ahead(2, |t| *t == token::OpenDelim(token::Brace)) + ) || ( + // `async {` + self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) + )) + } + + fn is_certainly_not_a_block(&self) -> bool { + self.look_ahead(1, |t| t.is_ident()) + && ( + // `{ ident, ` cannot start a block. + self.look_ahead(2, |t| t == &token::Comma) + || self.look_ahead(2, |t| t == &token::Colon) + && ( + // `{ ident: token, ` cannot start a block. + self.look_ahead(4, |t| t == &token::Comma) || + // `{ ident: ` cannot start a block unless it's a type ascription `ident: Type`. + self.look_ahead(3, |t| !t.can_begin_type()) + ) + ) + } + + fn maybe_parse_struct_expr( + &mut self, + path: &ast::Path, + attrs: &AttrVec, + ) -> Option<PResult<'a, P<Expr>>> { + let struct_allowed = !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL); + if struct_allowed || self.is_certainly_not_a_block() { + // This is a struct literal, but we don't can't accept them here. + let expr = self.parse_struct_expr(path.clone(), attrs.clone()); + if let (Ok(expr), false) = (&expr, struct_allowed) { + self.error_struct_lit_not_allowed_here(path.span, expr.span); + } + return Some(expr); + } + None + } + + fn error_struct_lit_not_allowed_here(&self, lo: Span, sp: Span) { + self.struct_span_err(sp, "struct literals are not allowed here") + .multipart_suggestion( + "surround the struct literal with parentheses", + vec![(lo.shrink_to_lo(), "(".to_string()), (sp.shrink_to_hi(), ")".to_string())], + Applicability::MachineApplicable, + ) + .emit(); + } + + pub(super) fn parse_struct_expr( + &mut self, + pth: ast::Path, + mut attrs: AttrVec, + ) -> PResult<'a, P<Expr>> { + self.bump(); + let mut fields = Vec::new(); + let mut base = None; + let mut recover_async = false; + + attrs.extend(self.parse_inner_attributes()?); + + let mut async_block_err = |e: &mut DiagnosticBuilder<'_>, span: Span| { + recover_async = true; + e.span_label(span, "`async` blocks are only allowed in the 2018 edition"); + e.help("set `edition = \"2018\"` in `Cargo.toml`"); + e.note("for more on editions, read https://doc.rust-lang.org/edition-guide"); + }; + + while self.token != token::CloseDelim(token::Brace) { + if self.eat(&token::DotDot) { + let exp_span = self.prev_token.span; + match self.parse_expr() { + Ok(e) => base = Some(e), + Err(mut e) => { + e.emit(); + self.recover_stmt(); + } + } + self.recover_struct_comma_after_dotdot(exp_span); + break; + } + + let recovery_field = self.find_struct_error_after_field_looking_code(); + let parsed_field = match self.parse_field() { + Ok(f) => Some(f), + Err(mut e) => { + if pth == kw::Async { + async_block_err(&mut e, pth.span); + } else { + e.span_label(pth.span, "while parsing this struct"); + } + e.emit(); + + // If the next token is a comma, then try to parse + // what comes next as additional fields, rather than + // bailing out until next `}`. + if self.token != token::Comma { + self.recover_stmt_(SemiColonMode::Comma, BlockMode::Ignore); + if self.token != token::Comma { + break; + } + } + None + } + }; + + match self.expect_one_of(&[token::Comma], &[token::CloseDelim(token::Brace)]) { + Ok(_) => { + if let Some(f) = parsed_field.or(recovery_field) { + // Only include the field if there's no parse error for the field name. + fields.push(f); + } + } + Err(mut e) => { + if pth == kw::Async { + async_block_err(&mut e, pth.span); + } else { + e.span_label(pth.span, "while parsing this struct"); + if let Some(f) = recovery_field { + fields.push(f); + e.span_suggestion( + self.prev_token.span.shrink_to_hi(), + "try adding a comma", + ",".into(), + Applicability::MachineApplicable, + ); + } + } + e.emit(); + self.recover_stmt_(SemiColonMode::Comma, BlockMode::Ignore); + self.eat(&token::Comma); + } + } + } + + let span = pth.span.to(self.token.span); + self.expect(&token::CloseDelim(token::Brace))?; + let expr = if recover_async { ExprKind::Err } else { ExprKind::Struct(pth, fields, base) }; + Ok(self.mk_expr(span, expr, attrs)) + } + + /// Use in case of error after field-looking code: `S { foo: () with a }`. + fn find_struct_error_after_field_looking_code(&self) -> Option<Field> { + match self.token.ident() { + Some((ident, is_raw)) + if (is_raw || !ident.is_reserved()) + && self.look_ahead(1, |t| *t == token::Colon) => + { + Some(ast::Field { + ident, + span: self.token.span, + expr: self.mk_expr_err(self.token.span), + is_shorthand: false, + attrs: AttrVec::new(), + id: DUMMY_NODE_ID, + is_placeholder: false, + }) + } + _ => None, + } + } + + fn recover_struct_comma_after_dotdot(&mut self, span: Span) { + if self.token != token::Comma { + return; + } + self.struct_span_err( + span.to(self.prev_token.span), + "cannot use a comma after the base struct", + ) + .span_suggestion_short( + self.token.span, + "remove this comma", + String::new(), + Applicability::MachineApplicable, + ) + .note("the base struct must always be the last field") + .emit(); + self.recover_stmt(); + } + + /// Parses `ident (COLON expr)?`. + fn parse_field(&mut self) -> PResult<'a, Field> { + let attrs = self.parse_outer_attributes()?.into(); + let lo = self.token.span; + + // Check if a colon exists one ahead. This means we're parsing a fieldname. + let is_shorthand = !self.look_ahead(1, |t| t == &token::Colon || t == &token::Eq); + let (ident, expr) = if is_shorthand { + // Mimic `x: x` for the `x` field shorthand. + let ident = self.parse_ident_common(false)?; + let path = ast::Path::from_ident(ident); + (ident, self.mk_expr(ident.span, ExprKind::Path(None, path), AttrVec::new())) + } else { + let ident = self.parse_field_name()?; + self.error_on_eq_field_init(ident); + self.bump(); // `:` + (ident, self.parse_expr()?) + }; + Ok(ast::Field { + ident, + span: lo.to(expr.span), + expr, + is_shorthand, + attrs, + id: DUMMY_NODE_ID, + is_placeholder: false, + }) + } + + /// Check for `=`. This means the source incorrectly attempts to + /// initialize a field with an eq rather than a colon. + fn error_on_eq_field_init(&self, field_name: Ident) { + if self.token != token::Eq { + return; + } + + self.struct_span_err(self.token.span, "expected `:`, found `=`") + .span_suggestion( + field_name.span.shrink_to_hi().to(self.token.span), + "replace equals symbol with a colon", + ":".to_string(), + Applicability::MachineApplicable, + ) + .emit(); + } + + fn err_dotdotdot_syntax(&self, span: Span) { + self.struct_span_err(span, "unexpected token: `...`") + .span_suggestion( + span, + "use `..` for an exclusive range", + "..".to_owned(), + Applicability::MaybeIncorrect, + ) + .span_suggestion( + span, + "or `..=` for an inclusive range", + "..=".to_owned(), + Applicability::MaybeIncorrect, + ) + .emit(); + } + + fn err_larrow_operator(&self, span: Span) { + self.struct_span_err(span, "unexpected token: `<-`") + .span_suggestion( + span, + "if you meant to write a comparison against a negative value, add a \ + space in between `<` and `-`", + "< -".to_string(), + Applicability::MaybeIncorrect, + ) + .emit(); + } + + fn mk_assign_op(&self, binop: BinOp, lhs: P<Expr>, rhs: P<Expr>) -> ExprKind { + ExprKind::AssignOp(binop, lhs, rhs) + } + + fn mk_range( + &self, + start: Option<P<Expr>>, + end: Option<P<Expr>>, + limits: RangeLimits, + ) -> PResult<'a, ExprKind> { + if end.is_none() && limits == RangeLimits::Closed { + self.error_inclusive_range_with_no_end(self.prev_token.span); + Ok(ExprKind::Err) + } else { + Ok(ExprKind::Range(start, end, limits)) + } + } + + fn mk_unary(&self, unop: UnOp, expr: P<Expr>) -> ExprKind { + ExprKind::Unary(unop, expr) + } + + fn mk_binary(&self, binop: BinOp, lhs: P<Expr>, rhs: P<Expr>) -> ExprKind { + ExprKind::Binary(binop, lhs, rhs) + } + + fn mk_index(&self, expr: P<Expr>, idx: P<Expr>) -> ExprKind { + ExprKind::Index(expr, idx) + } + + fn mk_call(&self, f: P<Expr>, args: Vec<P<Expr>>) -> ExprKind { + ExprKind::Call(f, args) + } + + fn mk_await_expr(&mut self, self_arg: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> { + let span = lo.to(self.prev_token.span); + let await_expr = self.mk_expr(span, ExprKind::Await(self_arg), AttrVec::new()); + self.recover_from_await_method_call(); + Ok(await_expr) + } + + crate fn mk_expr(&self, span: Span, kind: ExprKind, attrs: AttrVec) -> P<Expr> { + P(Expr { kind, span, attrs, id: DUMMY_NODE_ID, tokens: None }) + } + + pub(super) fn mk_expr_err(&self, span: Span) -> P<Expr> { + self.mk_expr(span, ExprKind::Err, AttrVec::new()) + } +} diff --git a/compiler/rustc_parse/src/parser/generics.rs b/compiler/rustc_parse/src/parser/generics.rs new file mode 100644 index 00000000000..dd99a7587dd --- /dev/null +++ b/compiler/rustc_parse/src/parser/generics.rs @@ -0,0 +1,292 @@ +use super::Parser; + +use rustc_ast::token; +use rustc_ast::{ + self as ast, Attribute, GenericBounds, GenericParam, GenericParamKind, WhereClause, +}; +use rustc_errors::PResult; +use rustc_span::symbol::{kw, sym}; + +impl<'a> Parser<'a> { + /// Parses bounds of a lifetime parameter `BOUND + BOUND + BOUND`, possibly with trailing `+`. + /// + /// ```text + /// BOUND = LT_BOUND (e.g., `'a`) + /// ``` + fn parse_lt_param_bounds(&mut self) -> GenericBounds { + let mut lifetimes = Vec::new(); + while self.check_lifetime() { + lifetimes.push(ast::GenericBound::Outlives(self.expect_lifetime())); + + if !self.eat_plus() { + break; + } + } + lifetimes + } + + /// Matches `typaram = IDENT (`?` unbound)? optbounds ( EQ ty )?`. + fn parse_ty_param(&mut self, preceding_attrs: Vec<Attribute>) -> PResult<'a, GenericParam> { + let ident = self.parse_ident()?; + + // Parse optional colon and param bounds. + let bounds = if self.eat(&token::Colon) { + self.parse_generic_bounds(Some(self.prev_token.span))? + } else { + Vec::new() + }; + + let default = if self.eat(&token::Eq) { Some(self.parse_ty()?) } else { None }; + + Ok(GenericParam { + ident, + id: ast::DUMMY_NODE_ID, + attrs: preceding_attrs.into(), + bounds, + kind: GenericParamKind::Type { default }, + is_placeholder: false, + }) + } + + fn parse_const_param(&mut self, preceding_attrs: Vec<Attribute>) -> PResult<'a, GenericParam> { + let const_span = self.token.span; + + self.expect_keyword(kw::Const)?; + let ident = self.parse_ident()?; + self.expect(&token::Colon)?; + let ty = self.parse_ty()?; + + self.sess.gated_spans.gate(sym::min_const_generics, const_span.to(self.prev_token.span)); + + Ok(GenericParam { + ident, + id: ast::DUMMY_NODE_ID, + attrs: preceding_attrs.into(), + bounds: Vec::new(), + kind: GenericParamKind::Const { ty, kw_span: const_span }, + is_placeholder: false, + }) + } + + /// Parses a (possibly empty) list of lifetime and type parameters, possibly including + /// a trailing comma and erroneous trailing attributes. + pub(super) fn parse_generic_params(&mut self) -> PResult<'a, Vec<ast::GenericParam>> { + let mut params = Vec::new(); + loop { + let attrs = self.parse_outer_attributes()?; + if self.check_lifetime() { + let lifetime = self.expect_lifetime(); + // Parse lifetime parameter. + let bounds = + if self.eat(&token::Colon) { self.parse_lt_param_bounds() } else { Vec::new() }; + params.push(ast::GenericParam { + ident: lifetime.ident, + id: lifetime.id, + attrs: attrs.into(), + bounds, + kind: ast::GenericParamKind::Lifetime, + is_placeholder: false, + }); + } else if self.check_keyword(kw::Const) { + // Parse const parameter. + params.push(self.parse_const_param(attrs)?); + } else if self.check_ident() { + // Parse type parameter. + params.push(self.parse_ty_param(attrs)?); + } else if self.token.can_begin_type() { + // Trying to write an associated type bound? (#26271) + let snapshot = self.clone(); + match self.parse_ty_where_predicate() { + Ok(where_predicate) => { + self.struct_span_err( + where_predicate.span(), + "bounds on associated types do not belong here", + ) + .span_label(where_predicate.span(), "belongs in `where` clause") + .emit(); + } + Err(mut err) => { + err.cancel(); + *self = snapshot; + break; + } + } + } else { + // Check for trailing attributes and stop parsing. + if !attrs.is_empty() { + if !params.is_empty() { + self.struct_span_err( + attrs[0].span, + "trailing attribute after generic parameter", + ) + .span_label(attrs[0].span, "attributes must go before parameters") + .emit(); + } else { + self.struct_span_err(attrs[0].span, "attribute without generic parameters") + .span_label( + attrs[0].span, + "attributes are only permitted when preceding parameters", + ) + .emit(); + } + } + break; + } + + if !self.eat(&token::Comma) { + break; + } + } + Ok(params) + } + + /// Parses a set of optional generic type parameter declarations. Where + /// clauses are not parsed here, and must be added later via + /// `parse_where_clause()`. + /// + /// matches generics = ( ) | ( < > ) | ( < typaramseq ( , )? > ) | ( < lifetimes ( , )? > ) + /// | ( < lifetimes , typaramseq ( , )? > ) + /// where typaramseq = ( typaram ) | ( typaram , typaramseq ) + pub(super) fn parse_generics(&mut self) -> PResult<'a, ast::Generics> { + let span_lo = self.token.span; + let (params, span) = if self.eat_lt() { + let params = self.parse_generic_params()?; + self.expect_gt()?; + (params, span_lo.to(self.prev_token.span)) + } else { + (vec![], self.prev_token.span.shrink_to_hi()) + }; + Ok(ast::Generics { + params, + where_clause: WhereClause { + has_where_token: false, + predicates: Vec::new(), + span: self.prev_token.span.shrink_to_hi(), + }, + span, + }) + } + + /// Parses an optional where-clause and places it in `generics`. + /// + /// ```ignore (only-for-syntax-highlight) + /// where T : Trait<U, V> + 'b, 'a : 'b + /// ``` + pub(super) fn parse_where_clause(&mut self) -> PResult<'a, WhereClause> { + let mut where_clause = WhereClause { + has_where_token: false, + predicates: Vec::new(), + span: self.prev_token.span.shrink_to_hi(), + }; + + if !self.eat_keyword(kw::Where) { + return Ok(where_clause); + } + where_clause.has_where_token = true; + let lo = self.prev_token.span; + + // We are considering adding generics to the `where` keyword as an alternative higher-rank + // parameter syntax (as in `where<'a>` or `where<T>`. To avoid that being a breaking + // change we parse those generics now, but report an error. + if self.choose_generics_over_qpath(0) { + let generics = self.parse_generics()?; + self.struct_span_err( + generics.span, + "generic parameters on `where` clauses are reserved for future use", + ) + .span_label(generics.span, "currently unsupported") + .emit(); + } + + loop { + let lo = self.token.span; + if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) { + let lifetime = self.expect_lifetime(); + // Bounds starting with a colon are mandatory, but possibly empty. + self.expect(&token::Colon)?; + let bounds = self.parse_lt_param_bounds(); + where_clause.predicates.push(ast::WherePredicate::RegionPredicate( + ast::WhereRegionPredicate { + span: lo.to(self.prev_token.span), + lifetime, + bounds, + }, + )); + } else if self.check_type() { + where_clause.predicates.push(self.parse_ty_where_predicate()?); + } else { + break; + } + + if !self.eat(&token::Comma) { + break; + } + } + + where_clause.span = lo.to(self.prev_token.span); + Ok(where_clause) + } + + fn parse_ty_where_predicate(&mut self) -> PResult<'a, ast::WherePredicate> { + let lo = self.token.span; + // Parse optional `for<'a, 'b>`. + // This `for` is parsed greedily and applies to the whole predicate, + // the bounded type can have its own `for` applying only to it. + // Examples: + // * `for<'a> Trait1<'a>: Trait2<'a /* ok */>` + // * `(for<'a> Trait1<'a>): Trait2<'a /* not ok */>` + // * `for<'a> for<'b> Trait1<'a, 'b>: Trait2<'a /* ok */, 'b /* not ok */>` + let lifetime_defs = self.parse_late_bound_lifetime_defs()?; + + // Parse type with mandatory colon and (possibly empty) bounds, + // or with mandatory equality sign and the second type. + let ty = self.parse_ty()?; + if self.eat(&token::Colon) { + let bounds = self.parse_generic_bounds(Some(self.prev_token.span))?; + Ok(ast::WherePredicate::BoundPredicate(ast::WhereBoundPredicate { + span: lo.to(self.prev_token.span), + bound_generic_params: lifetime_defs, + bounded_ty: ty, + bounds, + })) + // FIXME: Decide what should be used here, `=` or `==`. + // FIXME: We are just dropping the binders in lifetime_defs on the floor here. + } else if self.eat(&token::Eq) || self.eat(&token::EqEq) { + let rhs_ty = self.parse_ty()?; + Ok(ast::WherePredicate::EqPredicate(ast::WhereEqPredicate { + span: lo.to(self.prev_token.span), + lhs_ty: ty, + rhs_ty, + id: ast::DUMMY_NODE_ID, + })) + } else { + self.unexpected() + } + } + + pub(super) fn choose_generics_over_qpath(&self, start: usize) -> bool { + // There's an ambiguity between generic parameters and qualified paths in impls. + // If we see `<` it may start both, so we have to inspect some following tokens. + // The following combinations can only start generics, + // but not qualified paths (with one exception): + // `<` `>` - empty generic parameters + // `<` `#` - generic parameters with attributes + // `<` (LIFETIME|IDENT) `>` - single generic parameter + // `<` (LIFETIME|IDENT) `,` - first generic parameter in a list + // `<` (LIFETIME|IDENT) `:` - generic parameter with bounds + // `<` (LIFETIME|IDENT) `=` - generic parameter with a default + // `<` const - generic const parameter + // The only truly ambiguous case is + // `<` IDENT `>` `::` IDENT ... + // we disambiguate it in favor of generics (`impl<T> ::absolute::Path<T> { ... }`) + // because this is what almost always expected in practice, qualified paths in impls + // (`impl <Type>::AssocTy { ... }`) aren't even allowed by type checker at the moment. + self.look_ahead(start, |t| t == &token::Lt) + && (self.look_ahead(start + 1, |t| t == &token::Pound || t == &token::Gt) + || self.look_ahead(start + 1, |t| t.is_lifetime() || t.is_ident()) + && self.look_ahead(start + 2, |t| { + matches!(t.kind, token::Gt | token::Comma | token::Colon | token::Eq) + }) + || self.is_keyword_ahead(start + 1, &[kw::Const])) + } +} diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs new file mode 100644 index 00000000000..9143af651df --- /dev/null +++ b/compiler/rustc_parse/src/parser/item.rs @@ -0,0 +1,1843 @@ +use super::diagnostics::{dummy_arg, ConsumeClosingDelim, Error}; +use super::ty::{AllowPlus, RecoverQPath}; +use super::{FollowedByType, Parser, PathStyle}; + +use crate::maybe_whole; + +use rustc_ast::ptr::P; +use rustc_ast::token::{self, TokenKind}; +use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree}; +use rustc_ast::{self as ast, AttrStyle, AttrVec, Attribute, DUMMY_NODE_ID}; +use rustc_ast::{AssocItem, AssocItemKind, ForeignItemKind, Item, ItemKind, Mod}; +use rustc_ast::{Async, Const, Defaultness, IsAuto, Mutability, Unsafe, UseTree, UseTreeKind}; +use rustc_ast::{BindingMode, Block, FnDecl, FnSig, Param, SelfKind}; +use rustc_ast::{EnumDef, Generics, StructField, TraitRef, Ty, TyKind, Variant, VariantData}; +use rustc_ast::{FnHeader, ForeignItem, Path, PathSegment, Visibility, VisibilityKind}; +use rustc_ast::{MacArgs, MacCall, MacDelimiter}; +use rustc_ast_pretty::pprust; +use rustc_errors::{struct_span_err, Applicability, PResult, StashKey}; +use rustc_span::edition::Edition; +use rustc_span::source_map::{self, Span}; +use rustc_span::symbol::{kw, sym, Ident, Symbol}; + +use std::convert::TryFrom; +use std::mem; +use tracing::debug; + +impl<'a> Parser<'a> { + /// Parses a source module as a crate. This is the main entry point for the parser. + pub fn parse_crate_mod(&mut self) -> PResult<'a, ast::Crate> { + let lo = self.token.span; + let (module, attrs) = self.parse_mod(&token::Eof)?; + let span = lo.to(self.token.span); + let proc_macros = Vec::new(); // Filled in by `proc_macro_harness::inject()`. + Ok(ast::Crate { attrs, module, span, proc_macros }) + } + + /// Parses a `mod <foo> { ... }` or `mod <foo>;` item. + fn parse_item_mod(&mut self, attrs: &mut Vec<Attribute>) -> PResult<'a, ItemInfo> { + let id = self.parse_ident()?; + let (module, mut inner_attrs) = if self.eat(&token::Semi) { + Default::default() + } else { + self.expect(&token::OpenDelim(token::Brace))?; + self.parse_mod(&token::CloseDelim(token::Brace))? + }; + attrs.append(&mut inner_attrs); + Ok((id, ItemKind::Mod(module))) + } + + /// Parses the contents of a module (inner attributes followed by module items). + pub fn parse_mod(&mut self, term: &TokenKind) -> PResult<'a, (Mod, Vec<Attribute>)> { + let lo = self.token.span; + let attrs = self.parse_inner_attributes()?; + let module = self.parse_mod_items(term, lo)?; + Ok((module, attrs)) + } + + /// Given a termination token, parses all of the items in a module. + fn parse_mod_items(&mut self, term: &TokenKind, inner_lo: Span) -> PResult<'a, Mod> { + let mut items = vec![]; + while let Some(item) = self.parse_item()? { + items.push(item); + self.maybe_consume_incorrect_semicolon(&items); + } + + if !self.eat(term) { + let token_str = super::token_descr(&self.token); + if !self.maybe_consume_incorrect_semicolon(&items) { + let msg = &format!("expected item, found {}", token_str); + let mut err = self.struct_span_err(self.token.span, msg); + err.span_label(self.token.span, "expected item"); + return Err(err); + } + } + + let hi = if self.token.span.is_dummy() { inner_lo } else { self.prev_token.span }; + + Ok(Mod { inner: inner_lo.to(hi), items, inline: true }) + } +} + +pub(super) type ItemInfo = (Ident, ItemKind); + +impl<'a> Parser<'a> { + pub fn parse_item(&mut self) -> PResult<'a, Option<P<Item>>> { + self.parse_item_(|_| true).map(|i| i.map(P)) + } + + fn parse_item_(&mut self, req_name: ReqName) -> PResult<'a, Option<Item>> { + let attrs = self.parse_outer_attributes()?; + self.parse_item_common(attrs, true, false, req_name) + } + + pub(super) fn parse_item_common( + &mut self, + mut attrs: Vec<Attribute>, + mac_allowed: bool, + attrs_allowed: bool, + req_name: ReqName, + ) -> PResult<'a, Option<Item>> { + maybe_whole!(self, NtItem, |item| { + let mut item = item; + mem::swap(&mut item.attrs, &mut attrs); + item.attrs.extend(attrs); + Some(item.into_inner()) + }); + + let mut unclosed_delims = vec![]; + let has_attrs = !attrs.is_empty(); + let parse_item = |this: &mut Self| { + let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name); + unclosed_delims.append(&mut this.unclosed_delims); + item + }; + + let (mut item, tokens) = if has_attrs { + let (item, tokens) = self.collect_tokens(parse_item)?; + (item, Some(tokens)) + } else { + (parse_item(self)?, None) + }; + + self.unclosed_delims.append(&mut unclosed_delims); + + // Once we've parsed an item and recorded the tokens we got while + // parsing we may want to store `tokens` into the item we're about to + // return. Note, though, that we specifically didn't capture tokens + // related to outer attributes. The `tokens` field here may later be + // used with procedural macros to convert this item back into a token + // stream, but during expansion we may be removing attributes as we go + // along. + // + // If we've got inner attributes then the `tokens` we've got above holds + // these inner attributes. If an inner attribute is expanded we won't + // actually remove it from the token stream, so we'll just keep yielding + // it (bad!). To work around this case for now we just avoid recording + // `tokens` if we detect any inner attributes. This should help keep + // expansion correct, but we should fix this bug one day! + if let Some(tokens) = tokens { + if let Some(item) = &mut item { + if !item.attrs.iter().any(|attr| attr.style == AttrStyle::Inner) { + item.tokens = Some(tokens); + } + } + } + Ok(item) + } + + fn parse_item_common_( + &mut self, + mut attrs: Vec<Attribute>, + mac_allowed: bool, + attrs_allowed: bool, + req_name: ReqName, + ) -> PResult<'a, Option<Item>> { + let lo = self.token.span; + let vis = self.parse_visibility(FollowedByType::No)?; + let mut def = self.parse_defaultness(); + let kind = self.parse_item_kind(&mut attrs, mac_allowed, lo, &vis, &mut def, req_name)?; + if let Some((ident, kind)) = kind { + self.error_on_unconsumed_default(def, &kind); + let span = lo.to(self.prev_token.span); + let id = DUMMY_NODE_ID; + let item = Item { ident, attrs, id, kind, vis, span, tokens: None }; + return Ok(Some(item)); + } + + // At this point, we have failed to parse an item. + self.error_on_unmatched_vis(&vis); + self.error_on_unmatched_defaultness(def); + if !attrs_allowed { + self.recover_attrs_no_item(&attrs)?; + } + Ok(None) + } + + /// Error in-case a non-inherited visibility was parsed but no item followed. + fn error_on_unmatched_vis(&self, vis: &Visibility) { + if let VisibilityKind::Inherited = vis.node { + return; + } + let vs = pprust::vis_to_string(&vis); + let vs = vs.trim_end(); + self.struct_span_err(vis.span, &format!("visibility `{}` is not followed by an item", vs)) + .span_label(vis.span, "the visibility") + .help(&format!("you likely meant to define an item, e.g., `{} fn foo() {{}}`", vs)) + .emit(); + } + + /// Error in-case a `default` was parsed but no item followed. + fn error_on_unmatched_defaultness(&self, def: Defaultness) { + if let Defaultness::Default(sp) = def { + self.struct_span_err(sp, "`default` is not followed by an item") + .span_label(sp, "the `default` qualifier") + .note("only `fn`, `const`, `type`, or `impl` items may be prefixed by `default`") + .emit(); + } + } + + /// Error in-case `default` was parsed in an in-appropriate context. + fn error_on_unconsumed_default(&self, def: Defaultness, kind: &ItemKind) { + if let Defaultness::Default(span) = def { + let msg = format!("{} {} cannot be `default`", kind.article(), kind.descr()); + self.struct_span_err(span, &msg) + .span_label(span, "`default` because of this") + .note("only associated `fn`, `const`, and `type` items can be `default`") + .emit(); + } + } + + /// Parses one of the items allowed by the flags. + fn parse_item_kind( + &mut self, + attrs: &mut Vec<Attribute>, + macros_allowed: bool, + lo: Span, + vis: &Visibility, + def: &mut Defaultness, + req_name: ReqName, + ) -> PResult<'a, Option<ItemInfo>> { + let mut def = || mem::replace(def, Defaultness::Final); + + let info = if self.eat_keyword(kw::Use) { + // USE ITEM + let tree = self.parse_use_tree()?; + self.expect_semi()?; + (Ident::invalid(), ItemKind::Use(P(tree))) + } else if self.check_fn_front_matter() { + // FUNCTION ITEM + let (ident, sig, generics, body) = self.parse_fn(attrs, req_name, lo)?; + (ident, ItemKind::Fn(def(), sig, generics, body)) + } else if self.eat_keyword(kw::Extern) { + if self.eat_keyword(kw::Crate) { + // EXTERN CRATE + self.parse_item_extern_crate()? + } else { + // EXTERN BLOCK + self.parse_item_foreign_mod(attrs)? + } + } else if self.is_static_global() { + // STATIC ITEM + self.bump(); // `static` + let m = self.parse_mutability(); + let (ident, ty, expr) = self.parse_item_global(Some(m))?; + (ident, ItemKind::Static(ty, m, expr)) + } else if let Const::Yes(const_span) = self.parse_constness() { + // CONST ITEM + self.recover_const_mut(const_span); + let (ident, ty, expr) = self.parse_item_global(None)?; + (ident, ItemKind::Const(def(), ty, expr)) + } else if self.check_keyword(kw::Trait) || self.check_auto_or_unsafe_trait_item() { + // TRAIT ITEM + self.parse_item_trait(attrs, lo)? + } else if self.check_keyword(kw::Impl) + || self.check_keyword(kw::Unsafe) && self.is_keyword_ahead(1, &[kw::Impl]) + { + // IMPL ITEM + self.parse_item_impl(attrs, def())? + } else if self.eat_keyword(kw::Mod) { + // MODULE ITEM + self.parse_item_mod(attrs)? + } else if self.eat_keyword(kw::Type) { + // TYPE ITEM + self.parse_type_alias(def())? + } else if self.eat_keyword(kw::Enum) { + // ENUM ITEM + self.parse_item_enum()? + } else if self.eat_keyword(kw::Struct) { + // STRUCT ITEM + self.parse_item_struct()? + } else if self.is_kw_followed_by_ident(kw::Union) { + // UNION ITEM + self.bump(); // `union` + self.parse_item_union()? + } else if self.eat_keyword(kw::Macro) { + // MACROS 2.0 ITEM + self.parse_item_decl_macro(lo)? + } else if self.is_macro_rules_item() { + // MACRO_RULES ITEM + self.parse_item_macro_rules(vis)? + } else if vis.node.is_pub() && self.isnt_macro_invocation() { + self.recover_missing_kw_before_item()?; + return Ok(None); + } else if macros_allowed && self.check_path() { + // MACRO INVOCATION ITEM + (Ident::invalid(), ItemKind::MacCall(self.parse_item_macro(vis)?)) + } else { + return Ok(None); + }; + Ok(Some(info)) + } + + /// When parsing a statement, would the start of a path be an item? + pub(super) fn is_path_start_item(&mut self) -> bool { + self.is_crate_vis() // no: `crate::b`, yes: `crate $item` + || self.is_kw_followed_by_ident(kw::Union) // no: `union::b`, yes: `union U { .. }` + || self.check_auto_or_unsafe_trait_item() // no: `auto::b`, yes: `auto trait X { .. }` + || self.is_async_fn() // no(2015): `async::b`, yes: `async fn` + || self.is_macro_rules_item() // no: `macro_rules::b`, yes: `macro_rules! mac` + } + + /// Are we sure this could not possibly be a macro invocation? + fn isnt_macro_invocation(&mut self) -> bool { + self.check_ident() && self.look_ahead(1, |t| *t != token::Not && *t != token::ModSep) + } + + /// Recover on encountering a struct or method definition where the user + /// forgot to add the `struct` or `fn` keyword after writing `pub`: `pub S {}`. + fn recover_missing_kw_before_item(&mut self) -> PResult<'a, ()> { + // Space between `pub` keyword and the identifier + // + // pub S {} + // ^^^ `sp` points here + let sp = self.prev_token.span.between(self.token.span); + let full_sp = self.prev_token.span.to(self.token.span); + let ident_sp = self.token.span; + if self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) { + // possible public struct definition where `struct` was forgotten + let ident = self.parse_ident().unwrap(); + let msg = format!("add `struct` here to parse `{}` as a public struct", ident); + let mut err = self.struct_span_err(sp, "missing `struct` for struct definition"); + err.span_suggestion_short( + sp, + &msg, + " struct ".into(), + Applicability::MaybeIncorrect, // speculative + ); + Err(err) + } else if self.look_ahead(1, |t| *t == token::OpenDelim(token::Paren)) { + let ident = self.parse_ident().unwrap(); + self.bump(); // `(` + let kw_name = self.recover_first_param(); + self.consume_block(token::Paren, ConsumeClosingDelim::Yes); + let (kw, kw_name, ambiguous) = if self.check(&token::RArrow) { + self.eat_to_tokens(&[&token::OpenDelim(token::Brace)]); + self.bump(); // `{` + ("fn", kw_name, false) + } else if self.check(&token::OpenDelim(token::Brace)) { + self.bump(); // `{` + ("fn", kw_name, false) + } else if self.check(&token::Colon) { + let kw = "struct"; + (kw, kw, false) + } else { + ("fn` or `struct", "function or struct", true) + }; + + let msg = format!("missing `{}` for {} definition", kw, kw_name); + let mut err = self.struct_span_err(sp, &msg); + if !ambiguous { + self.consume_block(token::Brace, ConsumeClosingDelim::Yes); + let suggestion = + format!("add `{}` here to parse `{}` as a public {}", kw, ident, kw_name); + err.span_suggestion_short( + sp, + &suggestion, + format!(" {} ", kw), + Applicability::MachineApplicable, + ); + } else { + if let Ok(snippet) = self.span_to_snippet(ident_sp) { + err.span_suggestion( + full_sp, + "if you meant to call a macro, try", + format!("{}!", snippet), + // this is the `ambiguous` conditional branch + Applicability::MaybeIncorrect, + ); + } else { + err.help( + "if you meant to call a macro, remove the `pub` \ + and add a trailing `!` after the identifier", + ); + } + } + Err(err) + } else if self.look_ahead(1, |t| *t == token::Lt) { + let ident = self.parse_ident().unwrap(); + self.eat_to_tokens(&[&token::Gt]); + self.bump(); // `>` + let (kw, kw_name, ambiguous) = if self.eat(&token::OpenDelim(token::Paren)) { + ("fn", self.recover_first_param(), false) + } else if self.check(&token::OpenDelim(token::Brace)) { + ("struct", "struct", false) + } else { + ("fn` or `struct", "function or struct", true) + }; + let msg = format!("missing `{}` for {} definition", kw, kw_name); + let mut err = self.struct_span_err(sp, &msg); + if !ambiguous { + err.span_suggestion_short( + sp, + &format!("add `{}` here to parse `{}` as a public {}", kw, ident, kw_name), + format!(" {} ", kw), + Applicability::MachineApplicable, + ); + } + Err(err) + } else { + Ok(()) + } + } + + /// Parses an item macro, e.g., `item!();`. + fn parse_item_macro(&mut self, vis: &Visibility) -> PResult<'a, MacCall> { + let path = self.parse_path(PathStyle::Mod)?; // `foo::bar` + self.expect(&token::Not)?; // `!` + let args = self.parse_mac_args()?; // `( .. )` or `[ .. ]` (followed by `;`), or `{ .. }`. + self.eat_semi_for_macro_if_needed(&args); + self.complain_if_pub_macro(vis, false); + Ok(MacCall { path, args, prior_type_ascription: self.last_type_ascription }) + } + + /// Recover if we parsed attributes and expected an item but there was none. + fn recover_attrs_no_item(&mut self, attrs: &[Attribute]) -> PResult<'a, ()> { + let (start, end) = match attrs { + [] => return Ok(()), + [x0 @ xn] | [x0, .., xn] => (x0, xn), + }; + let msg = if end.is_doc_comment() { + "expected item after doc comment" + } else { + "expected item after attributes" + }; + let mut err = self.struct_span_err(end.span, msg); + if end.is_doc_comment() { + err.span_label(end.span, "this doc comment doesn't document anything"); + } + if let [.., penultimate, _] = attrs { + err.span_label(start.span.to(penultimate.span), "other attributes here"); + } + Err(err) + } + + fn is_async_fn(&self) -> bool { + self.token.is_keyword(kw::Async) && self.is_keyword_ahead(1, &[kw::Fn]) + } + + fn parse_polarity(&mut self) -> ast::ImplPolarity { + // Disambiguate `impl !Trait for Type { ... }` and `impl ! { ... }` for the never type. + if self.check(&token::Not) && self.look_ahead(1, |t| t.can_begin_type()) { + self.bump(); // `!` + ast::ImplPolarity::Negative(self.prev_token.span) + } else { + ast::ImplPolarity::Positive + } + } + + /// Parses an implementation item. + /// + /// ``` + /// impl<'a, T> TYPE { /* impl items */ } + /// impl<'a, T> TRAIT for TYPE { /* impl items */ } + /// impl<'a, T> !TRAIT for TYPE { /* impl items */ } + /// impl<'a, T> const TRAIT for TYPE { /* impl items */ } + /// ``` + /// + /// We actually parse slightly more relaxed grammar for better error reporting and recovery. + /// ``` + /// "impl" GENERICS "const"? "!"? TYPE "for"? (TYPE | "..") ("where" PREDICATES)? "{" BODY "}" + /// "impl" GENERICS "const"? "!"? TYPE ("where" PREDICATES)? "{" BODY "}" + /// ``` + fn parse_item_impl( + &mut self, + attrs: &mut Vec<Attribute>, + defaultness: Defaultness, + ) -> PResult<'a, ItemInfo> { + let unsafety = self.parse_unsafety(); + self.expect_keyword(kw::Impl)?; + + // First, parse generic parameters if necessary. + let mut generics = if self.choose_generics_over_qpath(0) { + self.parse_generics()? + } else { + let mut generics = Generics::default(); + // impl A for B {} + // /\ this is where `generics.span` should point when there are no type params. + generics.span = self.prev_token.span.shrink_to_hi(); + generics + }; + + let constness = self.parse_constness(); + if let Const::Yes(span) = constness { + self.sess.gated_spans.gate(sym::const_trait_impl, span); + } + + let polarity = self.parse_polarity(); + + // Parse both types and traits as a type, then reinterpret if necessary. + let err_path = |span| ast::Path::from_ident(Ident::new(kw::Invalid, span)); + let ty_first = if self.token.is_keyword(kw::For) && self.look_ahead(1, |t| t != &token::Lt) + { + let span = self.prev_token.span.between(self.token.span); + self.struct_span_err(span, "missing trait in a trait impl").emit(); + P(Ty { kind: TyKind::Path(None, err_path(span)), span, id: DUMMY_NODE_ID }) + } else { + self.parse_ty()? + }; + + // If `for` is missing we try to recover. + let has_for = self.eat_keyword(kw::For); + let missing_for_span = self.prev_token.span.between(self.token.span); + + let ty_second = if self.token == token::DotDot { + // We need to report this error after `cfg` expansion for compatibility reasons + self.bump(); // `..`, do not add it to expected tokens + Some(self.mk_ty(self.prev_token.span, TyKind::Err)) + } else if has_for || self.token.can_begin_type() { + Some(self.parse_ty()?) + } else { + None + }; + + generics.where_clause = self.parse_where_clause()?; + + let impl_items = self.parse_item_list(attrs, |p| p.parse_impl_item())?; + + let item_kind = match ty_second { + Some(ty_second) => { + // impl Trait for Type + if !has_for { + self.struct_span_err(missing_for_span, "missing `for` in a trait impl") + .span_suggestion_short( + missing_for_span, + "add `for` here", + " for ".to_string(), + Applicability::MachineApplicable, + ) + .emit(); + } + + let ty_first = ty_first.into_inner(); + let path = match ty_first.kind { + // This notably includes paths passed through `ty` macro fragments (#46438). + TyKind::Path(None, path) => path, + _ => { + self.struct_span_err(ty_first.span, "expected a trait, found type").emit(); + err_path(ty_first.span) + } + }; + let trait_ref = TraitRef { path, ref_id: ty_first.id }; + + ItemKind::Impl { + unsafety, + polarity, + defaultness, + constness, + generics, + of_trait: Some(trait_ref), + self_ty: ty_second, + items: impl_items, + } + } + None => { + // impl Type + ItemKind::Impl { + unsafety, + polarity, + defaultness, + constness, + generics, + of_trait: None, + self_ty: ty_first, + items: impl_items, + } + } + }; + + Ok((Ident::invalid(), item_kind)) + } + + fn parse_item_list<T>( + &mut self, + attrs: &mut Vec<Attribute>, + mut parse_item: impl FnMut(&mut Parser<'a>) -> PResult<'a, Option<Option<T>>>, + ) -> PResult<'a, Vec<T>> { + let open_brace_span = self.token.span; + self.expect(&token::OpenDelim(token::Brace))?; + attrs.append(&mut self.parse_inner_attributes()?); + + let mut items = Vec::new(); + while !self.eat(&token::CloseDelim(token::Brace)) { + if self.recover_doc_comment_before_brace() { + continue; + } + match parse_item(self) { + Ok(None) => { + // We have to bail or we'll potentially never make progress. + let non_item_span = self.token.span; + self.consume_block(token::Brace, ConsumeClosingDelim::Yes); + self.struct_span_err(non_item_span, "non-item in item list") + .span_label(open_brace_span, "item list starts here") + .span_label(non_item_span, "non-item starts here") + .span_label(self.prev_token.span, "item list ends here") + .emit(); + break; + } + Ok(Some(item)) => items.extend(item), + Err(mut err) => { + self.consume_block(token::Brace, ConsumeClosingDelim::Yes); + err.span_label(open_brace_span, "while parsing this item list starting here") + .span_label(self.prev_token.span, "the item list ends here") + .emit(); + break; + } + } + } + Ok(items) + } + + /// Recover on a doc comment before `}`. + fn recover_doc_comment_before_brace(&mut self) -> bool { + if let token::DocComment(..) = self.token.kind { + if self.look_ahead(1, |tok| tok == &token::CloseDelim(token::Brace)) { + struct_span_err!( + self.diagnostic(), + self.token.span, + E0584, + "found a documentation comment that doesn't document anything", + ) + .span_label(self.token.span, "this doc comment doesn't document anything") + .help( + "doc comments must come before what they document, maybe a \ + comment was intended with `//`?", + ) + .emit(); + self.bump(); + return true; + } + } + false + } + + /// Parses defaultness (i.e., `default` or nothing). + fn parse_defaultness(&mut self) -> Defaultness { + // We are interested in `default` followed by another identifier. + // However, we must avoid keywords that occur as binary operators. + // Currently, the only applicable keyword is `as` (`default as Ty`). + if self.check_keyword(kw::Default) + && self.look_ahead(1, |t| t.is_non_raw_ident_where(|i| i.name != kw::As)) + { + self.bump(); // `default` + Defaultness::Default(self.prev_token.uninterpolated_span()) + } else { + Defaultness::Final + } + } + + /// Is this an `(unsafe auto? | auto) trait` item? + fn check_auto_or_unsafe_trait_item(&mut self) -> bool { + // auto trait + self.check_keyword(kw::Auto) && self.is_keyword_ahead(1, &[kw::Trait]) + // unsafe auto trait + || self.check_keyword(kw::Unsafe) && self.is_keyword_ahead(1, &[kw::Trait, kw::Auto]) + } + + /// Parses `unsafe? auto? trait Foo { ... }` or `trait Foo = Bar;`. + fn parse_item_trait(&mut self, attrs: &mut Vec<Attribute>, lo: Span) -> PResult<'a, ItemInfo> { + let unsafety = self.parse_unsafety(); + // Parse optional `auto` prefix. + let is_auto = if self.eat_keyword(kw::Auto) { IsAuto::Yes } else { IsAuto::No }; + + self.expect_keyword(kw::Trait)?; + let ident = self.parse_ident()?; + let mut tps = self.parse_generics()?; + + // Parse optional colon and supertrait bounds. + let had_colon = self.eat(&token::Colon); + let span_at_colon = self.prev_token.span; + let bounds = if had_colon { + self.parse_generic_bounds(Some(self.prev_token.span))? + } else { + Vec::new() + }; + + let span_before_eq = self.prev_token.span; + if self.eat(&token::Eq) { + // It's a trait alias. + if had_colon { + let span = span_at_colon.to(span_before_eq); + self.struct_span_err(span, "bounds are not allowed on trait aliases").emit(); + } + + let bounds = self.parse_generic_bounds(None)?; + tps.where_clause = self.parse_where_clause()?; + self.expect_semi()?; + + let whole_span = lo.to(self.prev_token.span); + if is_auto == IsAuto::Yes { + let msg = "trait aliases cannot be `auto`"; + self.struct_span_err(whole_span, msg).span_label(whole_span, msg).emit(); + } + if let Unsafe::Yes(_) = unsafety { + let msg = "trait aliases cannot be `unsafe`"; + self.struct_span_err(whole_span, msg).span_label(whole_span, msg).emit(); + } + + self.sess.gated_spans.gate(sym::trait_alias, whole_span); + + Ok((ident, ItemKind::TraitAlias(tps, bounds))) + } else { + // It's a normal trait. + tps.where_clause = self.parse_where_clause()?; + let items = self.parse_item_list(attrs, |p| p.parse_trait_item())?; + Ok((ident, ItemKind::Trait(is_auto, unsafety, tps, bounds, items))) + } + } + + pub fn parse_impl_item(&mut self) -> PResult<'a, Option<Option<P<AssocItem>>>> { + self.parse_assoc_item(|_| true) + } + + pub fn parse_trait_item(&mut self) -> PResult<'a, Option<Option<P<AssocItem>>>> { + self.parse_assoc_item(|edition| edition >= Edition::Edition2018) + } + + /// Parses associated items. + fn parse_assoc_item(&mut self, req_name: ReqName) -> PResult<'a, Option<Option<P<AssocItem>>>> { + Ok(self.parse_item_(req_name)?.map(|Item { attrs, id, span, vis, ident, kind, tokens }| { + let kind = match AssocItemKind::try_from(kind) { + Ok(kind) => kind, + Err(kind) => match kind { + ItemKind::Static(a, _, b) => { + self.struct_span_err(span, "associated `static` items are not allowed") + .emit(); + AssocItemKind::Const(Defaultness::Final, a, b) + } + _ => return self.error_bad_item_kind(span, &kind, "`trait`s or `impl`s"), + }, + }; + Some(P(Item { attrs, id, span, vis, ident, kind, tokens })) + })) + } + + /// Parses a `type` alias with the following grammar: + /// ``` + /// TypeAlias = "type" Ident Generics {":" GenericBounds}? {"=" Ty}? ";" ; + /// ``` + /// The `"type"` has already been eaten. + fn parse_type_alias(&mut self, def: Defaultness) -> PResult<'a, ItemInfo> { + let ident = self.parse_ident()?; + let mut generics = self.parse_generics()?; + + // Parse optional colon and param bounds. + let bounds = + if self.eat(&token::Colon) { self.parse_generic_bounds(None)? } else { Vec::new() }; + generics.where_clause = self.parse_where_clause()?; + + let default = if self.eat(&token::Eq) { Some(self.parse_ty()?) } else { None }; + self.expect_semi()?; + + Ok((ident, ItemKind::TyAlias(def, generics, bounds, default))) + } + + /// Parses a `UseTree`. + /// + /// ```text + /// USE_TREE = [`::`] `*` | + /// [`::`] `{` USE_TREE_LIST `}` | + /// PATH `::` `*` | + /// PATH `::` `{` USE_TREE_LIST `}` | + /// PATH [`as` IDENT] + /// ``` + fn parse_use_tree(&mut self) -> PResult<'a, UseTree> { + let lo = self.token.span; + + let mut prefix = ast::Path { segments: Vec::new(), span: lo.shrink_to_lo() }; + let kind = if self.check(&token::OpenDelim(token::Brace)) + || self.check(&token::BinOp(token::Star)) + || self.is_import_coupler() + { + // `use *;` or `use ::*;` or `use {...};` or `use ::{...};` + let mod_sep_ctxt = self.token.span.ctxt(); + if self.eat(&token::ModSep) { + prefix + .segments + .push(PathSegment::path_root(lo.shrink_to_lo().with_ctxt(mod_sep_ctxt))); + } + + self.parse_use_tree_glob_or_nested()? + } else { + // `use path::*;` or `use path::{...};` or `use path;` or `use path as bar;` + prefix = self.parse_path(PathStyle::Mod)?; + + if self.eat(&token::ModSep) { + self.parse_use_tree_glob_or_nested()? + } else { + UseTreeKind::Simple(self.parse_rename()?, DUMMY_NODE_ID, DUMMY_NODE_ID) + } + }; + + Ok(UseTree { prefix, kind, span: lo.to(self.prev_token.span) }) + } + + /// Parses `*` or `{...}`. + fn parse_use_tree_glob_or_nested(&mut self) -> PResult<'a, UseTreeKind> { + Ok(if self.eat(&token::BinOp(token::Star)) { + UseTreeKind::Glob + } else { + UseTreeKind::Nested(self.parse_use_tree_list()?) + }) + } + + /// Parses a `UseTreeKind::Nested(list)`. + /// + /// ```text + /// USE_TREE_LIST = Ø | (USE_TREE `,`)* USE_TREE [`,`] + /// ``` + fn parse_use_tree_list(&mut self) -> PResult<'a, Vec<(UseTree, ast::NodeId)>> { + self.parse_delim_comma_seq(token::Brace, |p| Ok((p.parse_use_tree()?, DUMMY_NODE_ID))) + .map(|(r, _)| r) + } + + fn parse_rename(&mut self) -> PResult<'a, Option<Ident>> { + if self.eat_keyword(kw::As) { self.parse_ident_or_underscore().map(Some) } else { Ok(None) } + } + + fn parse_ident_or_underscore(&mut self) -> PResult<'a, Ident> { + match self.token.ident() { + Some((ident @ Ident { name: kw::Underscore, .. }, false)) => { + self.bump(); + Ok(ident) + } + _ => self.parse_ident(), + } + } + + /// Parses `extern crate` links. + /// + /// # Examples + /// + /// ``` + /// extern crate foo; + /// extern crate bar as foo; + /// ``` + fn parse_item_extern_crate(&mut self) -> PResult<'a, ItemInfo> { + // Accept `extern crate name-like-this` for better diagnostics + let orig_name = self.parse_crate_name_with_dashes()?; + let (item_name, orig_name) = if let Some(rename) = self.parse_rename()? { + (rename, Some(orig_name.name)) + } else { + (orig_name, None) + }; + self.expect_semi()?; + Ok((item_name, ItemKind::ExternCrate(orig_name))) + } + + fn parse_crate_name_with_dashes(&mut self) -> PResult<'a, Ident> { + let error_msg = "crate name using dashes are not valid in `extern crate` statements"; + let suggestion_msg = "if the original crate name uses dashes you need to use underscores \ + in the code"; + let mut ident = if self.token.is_keyword(kw::SelfLower) { + self.parse_path_segment_ident() + } else { + self.parse_ident() + }?; + let mut idents = vec![]; + let mut replacement = vec![]; + let mut fixed_crate_name = false; + // Accept `extern crate name-like-this` for better diagnostics. + let dash = token::BinOp(token::BinOpToken::Minus); + if self.token == dash { + // Do not include `-` as part of the expected tokens list. + while self.eat(&dash) { + fixed_crate_name = true; + replacement.push((self.prev_token.span, "_".to_string())); + idents.push(self.parse_ident()?); + } + } + if fixed_crate_name { + let fixed_name_sp = ident.span.to(idents.last().unwrap().span); + let mut fixed_name = format!("{}", ident.name); + for part in idents { + fixed_name.push_str(&format!("_{}", part.name)); + } + ident = Ident::from_str_and_span(&fixed_name, fixed_name_sp); + + self.struct_span_err(fixed_name_sp, error_msg) + .span_label(fixed_name_sp, "dash-separated idents are not valid") + .multipart_suggestion(suggestion_msg, replacement, Applicability::MachineApplicable) + .emit(); + } + Ok(ident) + } + + /// Parses `extern` for foreign ABIs modules. + /// + /// `extern` is expected to have been consumed before calling this method. + /// + /// # Examples + /// + /// ```ignore (only-for-syntax-highlight) + /// extern "C" {} + /// extern {} + /// ``` + fn parse_item_foreign_mod(&mut self, attrs: &mut Vec<Attribute>) -> PResult<'a, ItemInfo> { + let abi = self.parse_abi(); // ABI? + let items = self.parse_item_list(attrs, |p| p.parse_foreign_item())?; + let module = ast::ForeignMod { abi, items }; + Ok((Ident::invalid(), ItemKind::ForeignMod(module))) + } + + /// Parses a foreign item (one in an `extern { ... }` block). + pub fn parse_foreign_item(&mut self) -> PResult<'a, Option<Option<P<ForeignItem>>>> { + Ok(self.parse_item_(|_| true)?.map(|Item { attrs, id, span, vis, ident, kind, tokens }| { + let kind = match ForeignItemKind::try_from(kind) { + Ok(kind) => kind, + Err(kind) => match kind { + ItemKind::Const(_, a, b) => { + self.error_on_foreign_const(span, ident); + ForeignItemKind::Static(a, Mutability::Not, b) + } + _ => return self.error_bad_item_kind(span, &kind, "`extern` blocks"), + }, + }; + Some(P(Item { attrs, id, span, vis, ident, kind, tokens })) + })) + } + + fn error_bad_item_kind<T>(&self, span: Span, kind: &ItemKind, ctx: &str) -> Option<T> { + let span = self.sess.source_map().guess_head_span(span); + let descr = kind.descr(); + self.struct_span_err(span, &format!("{} is not supported in {}", descr, ctx)) + .help(&format!("consider moving the {} out to a nearby module scope", descr)) + .emit(); + None + } + + fn error_on_foreign_const(&self, span: Span, ident: Ident) { + self.struct_span_err(ident.span, "extern items cannot be `const`") + .span_suggestion( + span.with_hi(ident.span.lo()), + "try using a static value", + "static ".to_string(), + Applicability::MachineApplicable, + ) + .note("for more information, visit https://doc.rust-lang.org/std/keyword.extern.html") + .emit(); + } + + fn is_static_global(&mut self) -> bool { + if self.check_keyword(kw::Static) { + // Check if this could be a closure. + !self.look_ahead(1, |token| { + if token.is_keyword(kw::Move) { + return true; + } + match token.kind { + token::BinOp(token::Or) | token::OrOr => true, + _ => false, + } + }) + } else { + false + } + } + + /// Recover on `const mut` with `const` already eaten. + fn recover_const_mut(&mut self, const_span: Span) { + if self.eat_keyword(kw::Mut) { + let span = self.prev_token.span; + self.struct_span_err(span, "const globals cannot be mutable") + .span_label(span, "cannot be mutable") + .span_suggestion( + const_span, + "you might want to declare a static instead", + "static".to_owned(), + Applicability::MaybeIncorrect, + ) + .emit(); + } + } + + /// Parse `["const" | ("static" "mut"?)] $ident ":" $ty (= $expr)?` with + /// `["const" | ("static" "mut"?)]` already parsed and stored in `m`. + /// + /// When `m` is `"const"`, `$ident` may also be `"_"`. + fn parse_item_global( + &mut self, + m: Option<Mutability>, + ) -> PResult<'a, (Ident, P<Ty>, Option<P<ast::Expr>>)> { + let id = if m.is_none() { self.parse_ident_or_underscore() } else { self.parse_ident() }?; + + // Parse the type of a `const` or `static mut?` item. + // That is, the `":" $ty` fragment. + let ty = if self.eat(&token::Colon) { + self.parse_ty()? + } else { + self.recover_missing_const_type(id, m) + }; + + let expr = if self.eat(&token::Eq) { Some(self.parse_expr()?) } else { None }; + self.expect_semi()?; + Ok((id, ty, expr)) + } + + /// We were supposed to parse `:` but the `:` was missing. + /// This means that the type is missing. + fn recover_missing_const_type(&mut self, id: Ident, m: Option<Mutability>) -> P<Ty> { + // Construct the error and stash it away with the hope + // that typeck will later enrich the error with a type. + let kind = match m { + Some(Mutability::Mut) => "static mut", + Some(Mutability::Not) => "static", + None => "const", + }; + let mut err = self.struct_span_err(id.span, &format!("missing type for `{}` item", kind)); + err.span_suggestion( + id.span, + "provide a type for the item", + format!("{}: <type>", id), + Applicability::HasPlaceholders, + ); + err.stash(id.span, StashKey::ItemNoType); + + // The user intended that the type be inferred, + // so treat this as if the user wrote e.g. `const A: _ = expr;`. + P(Ty { kind: TyKind::Infer, span: id.span, id: ast::DUMMY_NODE_ID }) + } + + /// Parses an enum declaration. + fn parse_item_enum(&mut self) -> PResult<'a, ItemInfo> { + let id = self.parse_ident()?; + let mut generics = self.parse_generics()?; + generics.where_clause = self.parse_where_clause()?; + + let (variants, _) = + self.parse_delim_comma_seq(token::Brace, |p| p.parse_enum_variant()).map_err(|e| { + self.recover_stmt(); + e + })?; + + let enum_definition = + EnumDef { variants: variants.into_iter().filter_map(|v| v).collect() }; + Ok((id, ItemKind::Enum(enum_definition, generics))) + } + + fn parse_enum_variant(&mut self) -> PResult<'a, Option<Variant>> { + let variant_attrs = self.parse_outer_attributes()?; + let vlo = self.token.span; + + let vis = self.parse_visibility(FollowedByType::No)?; + if !self.recover_nested_adt_item(kw::Enum)? { + return Ok(None); + } + let ident = self.parse_ident()?; + + let struct_def = if self.check(&token::OpenDelim(token::Brace)) { + // Parse a struct variant. + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) + } else if self.check(&token::OpenDelim(token::Paren)) { + VariantData::Tuple(self.parse_tuple_struct_body()?, DUMMY_NODE_ID) + } else { + VariantData::Unit(DUMMY_NODE_ID) + }; + + let disr_expr = + if self.eat(&token::Eq) { Some(self.parse_anon_const_expr()?) } else { None }; + + let vr = ast::Variant { + ident, + vis, + id: DUMMY_NODE_ID, + attrs: variant_attrs, + data: struct_def, + disr_expr, + span: vlo.to(self.prev_token.span), + is_placeholder: false, + }; + + Ok(Some(vr)) + } + + /// Parses `struct Foo { ... }`. + fn parse_item_struct(&mut self) -> PResult<'a, ItemInfo> { + let class_name = self.parse_ident()?; + + let mut generics = self.parse_generics()?; + + // There is a special case worth noting here, as reported in issue #17904. + // If we are parsing a tuple struct it is the case that the where clause + // should follow the field list. Like so: + // + // struct Foo<T>(T) where T: Copy; + // + // If we are parsing a normal record-style struct it is the case + // that the where clause comes before the body, and after the generics. + // So if we look ahead and see a brace or a where-clause we begin + // parsing a record style struct. + // + // Otherwise if we look ahead and see a paren we parse a tuple-style + // struct. + + let vdata = if self.token.is_keyword(kw::Where) { + generics.where_clause = self.parse_where_clause()?; + if self.eat(&token::Semi) { + // If we see a: `struct Foo<T> where T: Copy;` style decl. + VariantData::Unit(DUMMY_NODE_ID) + } else { + // If we see: `struct Foo<T> where T: Copy { ... }` + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) + } + // No `where` so: `struct Foo<T>;` + } else if self.eat(&token::Semi) { + VariantData::Unit(DUMMY_NODE_ID) + // Record-style struct definition + } else if self.token == token::OpenDelim(token::Brace) { + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) + // Tuple-style struct definition with optional where-clause. + } else if self.token == token::OpenDelim(token::Paren) { + let body = VariantData::Tuple(self.parse_tuple_struct_body()?, DUMMY_NODE_ID); + generics.where_clause = self.parse_where_clause()?; + self.expect_semi()?; + body + } else { + let token_str = super::token_descr(&self.token); + let msg = &format!( + "expected `where`, `{{`, `(`, or `;` after struct name, found {}", + token_str + ); + let mut err = self.struct_span_err(self.token.span, msg); + err.span_label(self.token.span, "expected `where`, `{`, `(`, or `;` after struct name"); + return Err(err); + }; + + Ok((class_name, ItemKind::Struct(vdata, generics))) + } + + /// Parses `union Foo { ... }`. + fn parse_item_union(&mut self) -> PResult<'a, ItemInfo> { + let class_name = self.parse_ident()?; + + let mut generics = self.parse_generics()?; + + let vdata = if self.token.is_keyword(kw::Where) { + generics.where_clause = self.parse_where_clause()?; + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) + } else if self.token == token::OpenDelim(token::Brace) { + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) + } else { + let token_str = super::token_descr(&self.token); + let msg = &format!("expected `where` or `{{` after union name, found {}", token_str); + let mut err = self.struct_span_err(self.token.span, msg); + err.span_label(self.token.span, "expected `where` or `{` after union name"); + return Err(err); + }; + + Ok((class_name, ItemKind::Union(vdata, generics))) + } + + fn parse_record_struct_body( + &mut self, + ) -> PResult<'a, (Vec<StructField>, /* recovered */ bool)> { + let mut fields = Vec::new(); + let mut recovered = false; + if self.eat(&token::OpenDelim(token::Brace)) { + while self.token != token::CloseDelim(token::Brace) { + let field = self.parse_struct_decl_field().map_err(|e| { + self.consume_block(token::Brace, ConsumeClosingDelim::No); + recovered = true; + e + }); + match field { + Ok(field) => fields.push(field), + Err(mut err) => { + err.emit(); + break; + } + } + } + self.eat(&token::CloseDelim(token::Brace)); + } else { + let token_str = super::token_descr(&self.token); + let msg = &format!("expected `where`, or `{{` after struct name, found {}", token_str); + let mut err = self.struct_span_err(self.token.span, msg); + err.span_label(self.token.span, "expected `where`, or `{` after struct name"); + return Err(err); + } + + Ok((fields, recovered)) + } + + fn parse_tuple_struct_body(&mut self) -> PResult<'a, Vec<StructField>> { + // This is the case where we find `struct Foo<T>(T) where T: Copy;` + // Unit like structs are handled in parse_item_struct function + self.parse_paren_comma_seq(|p| { + let attrs = p.parse_outer_attributes()?; + let lo = p.token.span; + let vis = p.parse_visibility(FollowedByType::Yes)?; + let ty = p.parse_ty()?; + Ok(StructField { + span: lo.to(ty.span), + vis, + ident: None, + id: DUMMY_NODE_ID, + ty, + attrs, + is_placeholder: false, + }) + }) + .map(|(r, _)| r) + } + + /// Parses an element of a struct declaration. + fn parse_struct_decl_field(&mut self) -> PResult<'a, StructField> { + let attrs = self.parse_outer_attributes()?; + let lo = self.token.span; + let vis = self.parse_visibility(FollowedByType::No)?; + self.parse_single_struct_field(lo, vis, attrs) + } + + /// Parses a structure field declaration. + fn parse_single_struct_field( + &mut self, + lo: Span, + vis: Visibility, + attrs: Vec<Attribute>, + ) -> PResult<'a, StructField> { + let mut seen_comma: bool = false; + let a_var = self.parse_name_and_ty(lo, vis, attrs)?; + if self.token == token::Comma { + seen_comma = true; + } + match self.token.kind { + token::Comma => { + self.bump(); + } + token::CloseDelim(token::Brace) => {} + token::DocComment(..) => { + let previous_span = self.prev_token.span; + let mut err = self.span_fatal_err(self.token.span, Error::UselessDocComment); + self.bump(); // consume the doc comment + let comma_after_doc_seen = self.eat(&token::Comma); + // `seen_comma` is always false, because we are inside doc block + // condition is here to make code more readable + if !seen_comma && comma_after_doc_seen { + seen_comma = true; + } + if comma_after_doc_seen || self.token == token::CloseDelim(token::Brace) { + err.emit(); + } else { + if !seen_comma { + let sp = self.sess.source_map().next_point(previous_span); + err.span_suggestion( + sp, + "missing comma here", + ",".into(), + Applicability::MachineApplicable, + ); + } + return Err(err); + } + } + _ => { + let sp = self.prev_token.span.shrink_to_hi(); + let mut err = self.struct_span_err( + sp, + &format!("expected `,`, or `}}`, found {}", super::token_descr(&self.token)), + ); + + // Try to recover extra trailing angle brackets + let mut recovered = false; + if let TyKind::Path(_, Path { segments, .. }) = &a_var.ty.kind { + if let Some(last_segment) = segments.last() { + recovered = self.check_trailing_angle_brackets( + last_segment, + &[&token::Comma, &token::CloseDelim(token::Brace)], + ); + if recovered { + // Handle a case like `Vec<u8>>,` where we can continue parsing fields + // after the comma + self.eat(&token::Comma); + // `check_trailing_angle_brackets` already emitted a nicer error + err.cancel(); + } + } + } + + if self.token.is_ident() { + // This is likely another field; emit the diagnostic and keep going + err.span_suggestion( + sp, + "try adding a comma", + ",".into(), + Applicability::MachineApplicable, + ); + err.emit(); + recovered = true; + } + + if recovered { + // Make sure an error was emitted (either by recovering an angle bracket, + // or by finding an identifier as the next token), since we're + // going to continue parsing + assert!(self.sess.span_diagnostic.has_errors()); + } else { + return Err(err); + } + } + } + Ok(a_var) + } + + /// Parses a structure field. + fn parse_name_and_ty( + &mut self, + lo: Span, + vis: Visibility, + attrs: Vec<Attribute>, + ) -> PResult<'a, StructField> { + let name = self.parse_ident_common(false)?; + self.expect(&token::Colon)?; + let ty = self.parse_ty()?; + Ok(StructField { + span: lo.to(self.prev_token.span), + ident: Some(name), + vis, + id: DUMMY_NODE_ID, + ty, + attrs, + is_placeholder: false, + }) + } + + /// Parses a declarative macro 2.0 definition. + /// The `macro` keyword has already been parsed. + /// ``` + /// MacBody = "{" TOKEN_STREAM "}" ; + /// MacParams = "(" TOKEN_STREAM ")" ; + /// DeclMac = "macro" Ident MacParams? MacBody ; + /// ``` + fn parse_item_decl_macro(&mut self, lo: Span) -> PResult<'a, ItemInfo> { + let ident = self.parse_ident()?; + let body = if self.check(&token::OpenDelim(token::Brace)) { + self.parse_mac_args()? // `MacBody` + } else if self.check(&token::OpenDelim(token::Paren)) { + let params = self.parse_token_tree(); // `MacParams` + let pspan = params.span(); + if !self.check(&token::OpenDelim(token::Brace)) { + return self.unexpected(); + } + let body = self.parse_token_tree(); // `MacBody` + // Convert `MacParams MacBody` into `{ MacParams => MacBody }`. + let bspan = body.span(); + let arrow = TokenTree::token(token::FatArrow, pspan.between(bspan)); // `=>` + let tokens = TokenStream::new(vec![params.into(), arrow.into(), body.into()]); + let dspan = DelimSpan::from_pair(pspan.shrink_to_lo(), bspan.shrink_to_hi()); + P(MacArgs::Delimited(dspan, MacDelimiter::Brace, tokens)) + } else { + return self.unexpected(); + }; + + self.sess.gated_spans.gate(sym::decl_macro, lo.to(self.prev_token.span)); + Ok((ident, ItemKind::MacroDef(ast::MacroDef { body, macro_rules: false }))) + } + + /// Is this unambiguously the start of a `macro_rules! foo` item defnition? + fn is_macro_rules_item(&mut self) -> bool { + self.check_keyword(kw::MacroRules) + && self.look_ahead(1, |t| *t == token::Not) + && self.look_ahead(2, |t| t.is_ident()) + } + + /// Parses a `macro_rules! foo { ... }` declarative macro. + fn parse_item_macro_rules(&mut self, vis: &Visibility) -> PResult<'a, ItemInfo> { + self.expect_keyword(kw::MacroRules)?; // `macro_rules` + self.expect(&token::Not)?; // `!` + + let ident = self.parse_ident()?; + let body = self.parse_mac_args()?; + self.eat_semi_for_macro_if_needed(&body); + self.complain_if_pub_macro(vis, true); + + Ok((ident, ItemKind::MacroDef(ast::MacroDef { body, macro_rules: true }))) + } + + /// Item macro invocations or `macro_rules!` definitions need inherited visibility. + /// If that's not the case, emit an error. + fn complain_if_pub_macro(&self, vis: &Visibility, macro_rules: bool) { + if let VisibilityKind::Inherited = vis.node { + return; + } + + let vstr = pprust::vis_to_string(vis); + let vstr = vstr.trim_end(); + if macro_rules { + let msg = format!("can't qualify macro_rules invocation with `{}`", vstr); + self.struct_span_err(vis.span, &msg) + .span_suggestion( + vis.span, + "try exporting the macro", + "#[macro_export]".to_owned(), + Applicability::MaybeIncorrect, // speculative + ) + .emit(); + } else { + self.struct_span_err(vis.span, "can't qualify macro invocation with `pub`") + .span_suggestion( + vis.span, + "remove the visibility", + String::new(), + Applicability::MachineApplicable, + ) + .help(&format!("try adjusting the macro to put `{}` inside the invocation", vstr)) + .emit(); + } + } + + fn eat_semi_for_macro_if_needed(&mut self, args: &MacArgs) { + if args.need_semicolon() && !self.eat(&token::Semi) { + self.report_invalid_macro_expansion_item(args); + } + } + + fn report_invalid_macro_expansion_item(&self, args: &MacArgs) { + let span = args.span().expect("undelimited macro call"); + let mut err = self.struct_span_err( + span, + "macros that expand to items must be delimited with braces or followed by a semicolon", + ); + if self.unclosed_delims.is_empty() { + let DelimSpan { open, close } = match args { + MacArgs::Empty | MacArgs::Eq(..) => unreachable!(), + MacArgs::Delimited(dspan, ..) => *dspan, + }; + err.multipart_suggestion( + "change the delimiters to curly braces", + vec![(open, "{".to_string()), (close, '}'.to_string())], + Applicability::MaybeIncorrect, + ); + } else { + err.span_suggestion( + span, + "change the delimiters to curly braces", + " { /* items */ }".to_string(), + Applicability::HasPlaceholders, + ); + } + err.span_suggestion( + span.shrink_to_hi(), + "add a semicolon", + ';'.to_string(), + Applicability::MaybeIncorrect, + ); + err.emit(); + } + + /// Checks if current token is one of tokens which cannot be nested like `kw::Enum`. In case + /// it is, we try to parse the item and report error about nested types. + fn recover_nested_adt_item(&mut self, keyword: Symbol) -> PResult<'a, bool> { + if (self.token.is_keyword(kw::Enum) + || self.token.is_keyword(kw::Struct) + || self.token.is_keyword(kw::Union)) + && self.look_ahead(1, |t| t.is_ident()) + { + let kw_token = self.token.clone(); + let kw_str = pprust::token_to_string(&kw_token); + let item = self.parse_item()?; + + self.struct_span_err( + kw_token.span, + &format!("`{}` definition cannot be nested inside `{}`", kw_str, keyword), + ) + .span_suggestion( + item.unwrap().span, + &format!("consider creating a new `{}` definition instead of nesting", kw_str), + String::new(), + Applicability::MaybeIncorrect, + ) + .emit(); + // We successfully parsed the item but we must inform the caller about nested problem. + return Ok(false); + } + Ok(true) + } +} + +/// The parsing configuration used to parse a parameter list (see `parse_fn_params`). +/// +/// The function decides if, per-parameter `p`, `p` must have a pattern or just a type. +type ReqName = fn(Edition) -> bool; + +/// Parsing of functions and methods. +impl<'a> Parser<'a> { + /// Parse a function starting from the front matter (`const ...`) to the body `{ ... }` or `;`. + fn parse_fn( + &mut self, + attrs: &mut Vec<Attribute>, + req_name: ReqName, + sig_lo: Span, + ) -> PResult<'a, (Ident, FnSig, Generics, Option<P<Block>>)> { + let header = self.parse_fn_front_matter()?; // `const ... fn` + let ident = self.parse_ident()?; // `foo` + let mut generics = self.parse_generics()?; // `<'a, T, ...>` + let decl = self.parse_fn_decl(req_name, AllowPlus::Yes)?; // `(p: u8, ...)` + generics.where_clause = self.parse_where_clause()?; // `where T: Ord` + + let mut sig_hi = self.prev_token.span; + let body = self.parse_fn_body(attrs, &mut sig_hi)?; // `;` or `{ ... }`. + let fn_sig_span = sig_lo.to(sig_hi); + Ok((ident, FnSig { header, decl, span: fn_sig_span }, generics, body)) + } + + /// Parse the "body" of a function. + /// This can either be `;` when there's no body, + /// or e.g. a block when the function is a provided one. + fn parse_fn_body( + &mut self, + attrs: &mut Vec<Attribute>, + sig_hi: &mut Span, + ) -> PResult<'a, Option<P<Block>>> { + let (inner_attrs, body) = if self.check(&token::Semi) { + // Include the trailing semicolon in the span of the signature + *sig_hi = self.token.span; + self.bump(); // `;` + (Vec::new(), None) + } else if self.check(&token::OpenDelim(token::Brace)) || self.token.is_whole_block() { + self.parse_inner_attrs_and_block().map(|(attrs, body)| (attrs, Some(body)))? + } else if self.token.kind == token::Eq { + // Recover `fn foo() = $expr;`. + self.bump(); // `=` + let eq_sp = self.prev_token.span; + let _ = self.parse_expr()?; + self.expect_semi()?; // `;` + let span = eq_sp.to(self.prev_token.span); + self.struct_span_err(span, "function body cannot be `= expression;`") + .multipart_suggestion( + "surround the expression with `{` and `}` instead of `=` and `;`", + vec![(eq_sp, "{".to_string()), (self.prev_token.span, " }".to_string())], + Applicability::MachineApplicable, + ) + .emit(); + (Vec::new(), Some(self.mk_block_err(span))) + } else { + return self.expected_semi_or_open_brace(); + }; + attrs.extend(inner_attrs); + Ok(body) + } + + /// Is the current token the start of an `FnHeader` / not a valid parse? + pub(super) fn check_fn_front_matter(&mut self) -> bool { + // We use an over-approximation here. + // `const const`, `fn const` won't parse, but we're not stepping over other syntax either. + const QUALS: [Symbol; 4] = [kw::Const, kw::Async, kw::Unsafe, kw::Extern]; + self.check_keyword(kw::Fn) // Definitely an `fn`. + // `$qual fn` or `$qual $qual`: + || QUALS.iter().any(|&kw| self.check_keyword(kw)) + && self.look_ahead(1, |t| { + // ...qualified and then `fn`, e.g. `const fn`. + t.is_keyword(kw::Fn) + // Two qualifiers. This is enough. Due `async` we need to check that it's reserved. + || t.is_non_raw_ident_where(|i| QUALS.contains(&i.name) && i.is_reserved()) + }) + // `extern ABI fn` + || self.check_keyword(kw::Extern) + && self.look_ahead(1, |t| t.can_begin_literal_maybe_minus()) + && self.look_ahead(2, |t| t.is_keyword(kw::Fn)) + } + + /// Parses all the "front matter" (or "qualifiers") for a `fn` declaration, + /// up to and including the `fn` keyword. The formal grammar is: + /// + /// ``` + /// Extern = "extern" StringLit ; + /// FnQual = "const"? "async"? "unsafe"? Extern? ; + /// FnFrontMatter = FnQual? "fn" ; + /// ``` + pub(super) fn parse_fn_front_matter(&mut self) -> PResult<'a, FnHeader> { + let constness = self.parse_constness(); + let asyncness = self.parse_asyncness(); + let unsafety = self.parse_unsafety(); + let ext = self.parse_extern()?; + + if let Async::Yes { span, .. } = asyncness { + self.ban_async_in_2015(span); + } + + if !self.eat_keyword(kw::Fn) { + // It is possible for `expect_one_of` to recover given the contents of + // `self.expected_tokens`, therefore, do not use `self.unexpected()` which doesn't + // account for this. + if !self.expect_one_of(&[], &[])? { + unreachable!() + } + } + + Ok(FnHeader { constness, unsafety, asyncness, ext }) + } + + /// We are parsing `async fn`. If we are on Rust 2015, emit an error. + fn ban_async_in_2015(&self, span: Span) { + if span.rust_2015() { + let diag = self.diagnostic(); + struct_span_err!(diag, span, E0670, "`async fn` is not permitted in the 2015 edition") + .span_label(span, "to use `async fn`, switch to Rust 2018") + .help("set `edition = \"2018\"` in `Cargo.toml`") + .note("for more on editions, read https://doc.rust-lang.org/edition-guide") + .emit(); + } + } + + /// Parses the parameter list and result type of a function declaration. + pub(super) fn parse_fn_decl( + &mut self, + req_name: ReqName, + ret_allow_plus: AllowPlus, + ) -> PResult<'a, P<FnDecl>> { + Ok(P(FnDecl { + inputs: self.parse_fn_params(req_name)?, + output: self.parse_ret_ty(ret_allow_plus, RecoverQPath::Yes)?, + })) + } + + /// Parses the parameter list of a function, including the `(` and `)` delimiters. + fn parse_fn_params(&mut self, req_name: ReqName) -> PResult<'a, Vec<Param>> { + let mut first_param = true; + // Parse the arguments, starting out with `self` being allowed... + let (mut params, _) = self.parse_paren_comma_seq(|p| { + let param = p.parse_param_general(req_name, first_param).or_else(|mut e| { + e.emit(); + let lo = p.prev_token.span; + // Skip every token until next possible arg or end. + p.eat_to_tokens(&[&token::Comma, &token::CloseDelim(token::Paren)]); + // Create a placeholder argument for proper arg count (issue #34264). + Ok(dummy_arg(Ident::new(kw::Invalid, lo.to(p.prev_token.span)))) + }); + // ...now that we've parsed the first argument, `self` is no longer allowed. + first_param = false; + param + })?; + // Replace duplicated recovered params with `_` pattern to avoid unnecessary errors. + self.deduplicate_recovered_params_names(&mut params); + Ok(params) + } + + /// Parses a single function parameter. + /// + /// - `self` is syntactically allowed when `first_param` holds. + fn parse_param_general(&mut self, req_name: ReqName, first_param: bool) -> PResult<'a, Param> { + let lo = self.token.span; + let attrs = self.parse_outer_attributes()?; + + // Possibly parse `self`. Recover if we parsed it and it wasn't allowed here. + if let Some(mut param) = self.parse_self_param()? { + param.attrs = attrs.into(); + return if first_param { Ok(param) } else { self.recover_bad_self_param(param) }; + } + + let is_name_required = match self.token.kind { + token::DotDotDot => false, + _ => req_name(self.token.span.edition()), + }; + let (pat, ty) = if is_name_required || self.is_named_param() { + debug!("parse_param_general parse_pat (is_name_required:{})", is_name_required); + + let pat = self.parse_fn_param_pat()?; + if let Err(mut err) = self.expect(&token::Colon) { + return if let Some(ident) = + self.parameter_without_type(&mut err, pat, is_name_required, first_param) + { + err.emit(); + Ok(dummy_arg(ident)) + } else { + Err(err) + }; + } + + self.eat_incorrect_doc_comment_for_param_type(); + (pat, self.parse_ty_for_param()?) + } else { + debug!("parse_param_general ident_to_pat"); + let parser_snapshot_before_ty = self.clone(); + self.eat_incorrect_doc_comment_for_param_type(); + let mut ty = self.parse_ty_for_param(); + if ty.is_ok() + && self.token != token::Comma + && self.token != token::CloseDelim(token::Paren) + { + // This wasn't actually a type, but a pattern looking like a type, + // so we are going to rollback and re-parse for recovery. + ty = self.unexpected(); + } + match ty { + Ok(ty) => { + let ident = Ident::new(kw::Invalid, self.prev_token.span); + let bm = BindingMode::ByValue(Mutability::Not); + let pat = self.mk_pat_ident(ty.span, bm, ident); + (pat, ty) + } + // If this is a C-variadic argument and we hit an error, return the error. + Err(err) if self.token == token::DotDotDot => return Err(err), + // Recover from attempting to parse the argument as a type without pattern. + Err(mut err) => { + err.cancel(); + *self = parser_snapshot_before_ty; + self.recover_arg_parse()? + } + } + }; + + let span = lo.to(self.token.span); + + Ok(Param { + attrs: attrs.into(), + id: ast::DUMMY_NODE_ID, + is_placeholder: false, + pat, + span, + ty, + }) + } + + /// Returns the parsed optional self parameter and whether a self shortcut was used. + fn parse_self_param(&mut self) -> PResult<'a, Option<Param>> { + // Extract an identifier *after* having confirmed that the token is one. + let expect_self_ident = |this: &mut Self| match this.token.ident() { + Some((ident, false)) => { + this.bump(); + ident + } + _ => unreachable!(), + }; + // Is `self` `n` tokens ahead? + let is_isolated_self = |this: &Self, n| { + this.is_keyword_ahead(n, &[kw::SelfLower]) + && this.look_ahead(n + 1, |t| t != &token::ModSep) + }; + // Is `mut self` `n` tokens ahead? + let is_isolated_mut_self = + |this: &Self, n| this.is_keyword_ahead(n, &[kw::Mut]) && is_isolated_self(this, n + 1); + // Parse `self` or `self: TYPE`. We already know the current token is `self`. + let parse_self_possibly_typed = |this: &mut Self, m| { + let eself_ident = expect_self_ident(this); + let eself_hi = this.prev_token.span; + let eself = if this.eat(&token::Colon) { + SelfKind::Explicit(this.parse_ty()?, m) + } else { + SelfKind::Value(m) + }; + Ok((eself, eself_ident, eself_hi)) + }; + // Recover for the grammar `*self`, `*const self`, and `*mut self`. + let recover_self_ptr = |this: &mut Self| { + let msg = "cannot pass `self` by raw pointer"; + let span = this.token.span; + this.struct_span_err(span, msg).span_label(span, msg).emit(); + + Ok((SelfKind::Value(Mutability::Not), expect_self_ident(this), this.prev_token.span)) + }; + + // Parse optional `self` parameter of a method. + // Only a limited set of initial token sequences is considered `self` parameters; anything + // else is parsed as a normal function parameter list, so some lookahead is required. + let eself_lo = self.token.span; + let (eself, eself_ident, eself_hi) = match self.token.uninterpolate().kind { + token::BinOp(token::And) => { + let eself = if is_isolated_self(self, 1) { + // `&self` + self.bump(); + SelfKind::Region(None, Mutability::Not) + } else if is_isolated_mut_self(self, 1) { + // `&mut self` + self.bump(); + self.bump(); + SelfKind::Region(None, Mutability::Mut) + } else if self.look_ahead(1, |t| t.is_lifetime()) && is_isolated_self(self, 2) { + // `&'lt self` + self.bump(); + let lt = self.expect_lifetime(); + SelfKind::Region(Some(lt), Mutability::Not) + } else if self.look_ahead(1, |t| t.is_lifetime()) && is_isolated_mut_self(self, 2) { + // `&'lt mut self` + self.bump(); + let lt = self.expect_lifetime(); + self.bump(); + SelfKind::Region(Some(lt), Mutability::Mut) + } else { + // `¬_self` + return Ok(None); + }; + (eself, expect_self_ident(self), self.prev_token.span) + } + // `*self` + token::BinOp(token::Star) if is_isolated_self(self, 1) => { + self.bump(); + recover_self_ptr(self)? + } + // `*mut self` and `*const self` + token::BinOp(token::Star) + if self.look_ahead(1, |t| t.is_mutability()) && is_isolated_self(self, 2) => + { + self.bump(); + self.bump(); + recover_self_ptr(self)? + } + // `self` and `self: TYPE` + token::Ident(..) if is_isolated_self(self, 0) => { + parse_self_possibly_typed(self, Mutability::Not)? + } + // `mut self` and `mut self: TYPE` + token::Ident(..) if is_isolated_mut_self(self, 0) => { + self.bump(); + parse_self_possibly_typed(self, Mutability::Mut)? + } + _ => return Ok(None), + }; + + let eself = source_map::respan(eself_lo.to(eself_hi), eself); + Ok(Some(Param::from_self(AttrVec::default(), eself, eself_ident))) + } + + fn is_named_param(&self) -> bool { + let offset = match self.token.kind { + token::Interpolated(ref nt) => match **nt { + token::NtPat(..) => return self.look_ahead(1, |t| t == &token::Colon), + _ => 0, + }, + token::BinOp(token::And) | token::AndAnd => 1, + _ if self.token.is_keyword(kw::Mut) => 1, + _ => 0, + }; + + self.look_ahead(offset, |t| t.is_ident()) + && self.look_ahead(offset + 1, |t| t == &token::Colon) + } + + fn recover_first_param(&mut self) -> &'static str { + match self + .parse_outer_attributes() + .and_then(|_| self.parse_self_param()) + .map_err(|mut e| e.cancel()) + { + Ok(Some(_)) => "method", + _ => "function", + } + } +} diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs new file mode 100644 index 00000000000..d67ed74bc99 --- /dev/null +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -0,0 +1,1270 @@ +pub mod attr; +mod diagnostics; +mod expr; +mod generics; +mod item; +mod nonterminal; +mod pat; +mod path; +mod stmt; +mod ty; + +use crate::lexer::UnmatchedBrace; +use diagnostics::Error; +pub use path::PathStyle; + +use rustc_ast::ptr::P; +use rustc_ast::token::{self, DelimToken, Token, TokenKind}; +use rustc_ast::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndJoint}; +use rustc_ast::DUMMY_NODE_ID; +use rustc_ast::{self as ast, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe}; +use rustc_ast::{Async, MacArgs, MacDelimiter, Mutability, StrLit, Visibility, VisibilityKind}; +use rustc_ast_pretty::pprust; +use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError, PResult}; +use rustc_session::parse::ParseSess; +use rustc_span::source_map::{respan, Span, DUMMY_SP}; +use rustc_span::symbol::{kw, sym, Ident, Symbol}; +use tracing::debug; + +use std::{cmp, mem, slice}; + +bitflags::bitflags! { + struct Restrictions: u8 { + const STMT_EXPR = 1 << 0; + const NO_STRUCT_LITERAL = 1 << 1; + } +} + +#[derive(Clone, Copy, PartialEq, Debug)] +enum SemiColonMode { + Break, + Ignore, + Comma, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +enum BlockMode { + Break, + Ignore, +} + +/// Like `maybe_whole_expr`, but for things other than expressions. +#[macro_export] +macro_rules! maybe_whole { + ($p:expr, $constructor:ident, |$x:ident| $e:expr) => { + if let token::Interpolated(nt) = &$p.token.kind { + if let token::$constructor(x) = &**nt { + let $x = x.clone(); + $p.bump(); + return Ok($e); + } + } + }; +} + +/// If the next tokens are ill-formed `$ty::` recover them as `<$ty>::`. +#[macro_export] +macro_rules! maybe_recover_from_interpolated_ty_qpath { + ($self: expr, $allow_qpath_recovery: expr) => { + if $allow_qpath_recovery && $self.look_ahead(1, |t| t == &token::ModSep) { + if let token::Interpolated(nt) = &$self.token.kind { + if let token::NtTy(ty) = &**nt { + let ty = ty.clone(); + $self.bump(); + return $self.maybe_recover_from_bad_qpath_stage_2($self.prev_token.span, ty); + } + } + } + }; +} + +#[derive(Clone)] +pub struct Parser<'a> { + pub sess: &'a ParseSess, + /// The current token. + pub token: Token, + /// The previous token. + pub prev_token: Token, + restrictions: Restrictions, + expected_tokens: Vec<TokenType>, + token_cursor: TokenCursor, + desugar_doc_comments: bool, + /// This field is used to keep track of how many left angle brackets we have seen. This is + /// required in order to detect extra leading left angle brackets (`<` characters) and error + /// appropriately. + /// + /// See the comments in the `parse_path_segment` function for more details. + unmatched_angle_bracket_count: u32, + max_angle_bracket_count: u32, + /// A list of all unclosed delimiters found by the lexer. If an entry is used for error recovery + /// it gets removed from here. Every entry left at the end gets emitted as an independent + /// error. + pub(super) unclosed_delims: Vec<UnmatchedBrace>, + last_unexpected_token_span: Option<Span>, + /// Span pointing at the `:` for the last type ascription the parser has seen, and whether it + /// looked like it could have been a mistyped path or literal `Option:Some(42)`). + pub last_type_ascription: Option<(Span, bool /* likely path typo */)>, + /// If present, this `Parser` is not parsing Rust code but rather a macro call. + subparser_name: Option<&'static str>, +} + +impl<'a> Drop for Parser<'a> { + fn drop(&mut self) { + emit_unclosed_delims(&mut self.unclosed_delims, &self.sess); + } +} + +#[derive(Clone)] +struct TokenCursor { + frame: TokenCursorFrame, + stack: Vec<TokenCursorFrame>, + cur_token: Option<TreeAndJoint>, + collecting: Option<Collecting>, +} + +#[derive(Clone)] +struct TokenCursorFrame { + delim: token::DelimToken, + span: DelimSpan, + open_delim: bool, + tree_cursor: tokenstream::Cursor, + close_delim: bool, +} + +/// Used to track additional state needed by `collect_tokens` +#[derive(Clone, Debug)] +struct Collecting { + /// Holds the current tokens captured during the most + /// recent call to `collect_tokens` + buf: Vec<TreeAndJoint>, + /// The depth of the `TokenCursor` stack at the time + /// collection was started. When we encounter a `TokenTree::Delimited`, + /// we want to record the `TokenTree::Delimited` itself, + /// but *not* any of the inner tokens while we are inside + /// the new frame (this would cause us to record duplicate tokens). + /// + /// This `depth` fields tracks stack depth we are recording tokens. + /// Only tokens encountered at this depth will be recorded. See + /// `TokenCursor::next` for more details. + depth: usize, +} + +impl TokenCursorFrame { + fn new(span: DelimSpan, delim: DelimToken, tts: &TokenStream) -> Self { + TokenCursorFrame { + delim, + span, + open_delim: delim == token::NoDelim, + tree_cursor: tts.clone().into_trees(), + close_delim: delim == token::NoDelim, + } + } +} + +impl TokenCursor { + fn next(&mut self) -> Token { + loop { + let tree = if !self.frame.open_delim { + self.frame.open_delim = true; + TokenTree::open_tt(self.frame.span, self.frame.delim).into() + } else if let Some(tree) = self.frame.tree_cursor.next_with_joint() { + tree + } else if !self.frame.close_delim { + self.frame.close_delim = true; + TokenTree::close_tt(self.frame.span, self.frame.delim).into() + } else if let Some(frame) = self.stack.pop() { + self.frame = frame; + continue; + } else { + return Token::new(token::Eof, DUMMY_SP); + }; + + // Don't set an open delimiter as our current token - we want + // to leave it as the full `TokenTree::Delimited` from the previous + // iteration of this loop + if !matches!(tree.0, TokenTree::Token(Token { kind: TokenKind::OpenDelim(_), .. })) { + self.cur_token = Some(tree.clone()); + } + + if let Some(collecting) = &mut self.collecting { + if collecting.depth == self.stack.len() { + debug!( + "TokenCursor::next(): collected {:?} at depth {:?}", + tree, + self.stack.len() + ); + collecting.buf.push(tree.clone()) + } + } + + match tree.0 { + TokenTree::Token(token) => return token, + TokenTree::Delimited(sp, delim, tts) => { + let frame = TokenCursorFrame::new(sp, delim, &tts); + self.stack.push(mem::replace(&mut self.frame, frame)); + } + } + } + } + + fn next_desugared(&mut self) -> Token { + let (data, attr_style, sp) = match self.next() { + Token { kind: token::DocComment(_, attr_style, data), span } => { + (data, attr_style, span) + } + tok => return tok, + }; + + // Searches for the occurrences of `"#*` and returns the minimum number of `#`s + // required to wrap the text. + let mut num_of_hashes = 0; + let mut count = 0; + for ch in data.as_str().chars() { + count = match ch { + '"' => 1, + '#' if count > 0 => count + 1, + _ => 0, + }; + num_of_hashes = cmp::max(num_of_hashes, count); + } + + let delim_span = DelimSpan::from_single(sp); + let body = TokenTree::Delimited( + delim_span, + token::Bracket, + [ + TokenTree::token(token::Ident(sym::doc, false), sp), + TokenTree::token(token::Eq, sp), + TokenTree::token(TokenKind::lit(token::StrRaw(num_of_hashes), data, None), sp), + ] + .iter() + .cloned() + .collect::<TokenStream>(), + ); + + self.stack.push(mem::replace( + &mut self.frame, + TokenCursorFrame::new( + delim_span, + token::NoDelim, + &if attr_style == AttrStyle::Inner { + [TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body] + .iter() + .cloned() + .collect::<TokenStream>() + } else { + [TokenTree::token(token::Pound, sp), body] + .iter() + .cloned() + .collect::<TokenStream>() + }, + ), + )); + + self.next() + } +} + +#[derive(Clone, PartialEq)] +enum TokenType { + Token(TokenKind), + Keyword(Symbol), + Operator, + Lifetime, + Ident, + Path, + Type, + Const, +} + +impl TokenType { + fn to_string(&self) -> String { + match *self { + TokenType::Token(ref t) => format!("`{}`", pprust::token_kind_to_string(t)), + TokenType::Keyword(kw) => format!("`{}`", kw), + TokenType::Operator => "an operator".to_string(), + TokenType::Lifetime => "lifetime".to_string(), + TokenType::Ident => "identifier".to_string(), + TokenType::Path => "path".to_string(), + TokenType::Type => "type".to_string(), + TokenType::Const => "const".to_string(), + } + } +} + +#[derive(Copy, Clone, Debug)] +enum TokenExpectType { + Expect, + NoExpect, +} + +/// A sequence separator. +struct SeqSep { + /// The separator token. + sep: Option<TokenKind>, + /// `true` if a trailing separator is allowed. + trailing_sep_allowed: bool, +} + +impl SeqSep { + fn trailing_allowed(t: TokenKind) -> SeqSep { + SeqSep { sep: Some(t), trailing_sep_allowed: true } + } + + fn none() -> SeqSep { + SeqSep { sep: None, trailing_sep_allowed: false } + } +} + +pub enum FollowedByType { + Yes, + No, +} + +fn token_descr_opt(token: &Token) -> Option<&'static str> { + Some(match token.kind { + _ if token.is_special_ident() => "reserved identifier", + _ if token.is_used_keyword() => "keyword", + _ if token.is_unused_keyword() => "reserved keyword", + token::DocComment(..) => "doc comment", + _ => return None, + }) +} + +pub(super) fn token_descr(token: &Token) -> String { + let token_str = pprust::token_to_string(token); + match token_descr_opt(token) { + Some(prefix) => format!("{} `{}`", prefix, token_str), + _ => format!("`{}`", token_str), + } +} + +impl<'a> Parser<'a> { + pub fn new( + sess: &'a ParseSess, + tokens: TokenStream, + desugar_doc_comments: bool, + subparser_name: Option<&'static str>, + ) -> Self { + let mut parser = Parser { + sess, + token: Token::dummy(), + prev_token: Token::dummy(), + restrictions: Restrictions::empty(), + expected_tokens: Vec::new(), + token_cursor: TokenCursor { + frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, &tokens), + stack: Vec::new(), + cur_token: None, + collecting: None, + }, + desugar_doc_comments, + unmatched_angle_bracket_count: 0, + max_angle_bracket_count: 0, + unclosed_delims: Vec::new(), + last_unexpected_token_span: None, + last_type_ascription: None, + subparser_name, + }; + + // Make parser point to the first token. + parser.bump(); + + parser + } + + fn next_tok(&mut self, fallback_span: Span) -> Token { + let mut next = if self.desugar_doc_comments { + self.token_cursor.next_desugared() + } else { + self.token_cursor.next() + }; + if next.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + next.span = fallback_span.with_ctxt(next.span.ctxt()); + } + next + } + + crate fn unexpected<T>(&mut self) -> PResult<'a, T> { + match self.expect_one_of(&[], &[]) { + Err(e) => Err(e), + // We can get `Ok(true)` from `recover_closing_delimiter` + // which is called in `expected_one_of_not_found`. + Ok(_) => FatalError.raise(), + } + } + + /// Expects and consumes the token `t`. Signals an error if the next token is not `t`. + pub fn expect(&mut self, t: &TokenKind) -> PResult<'a, bool /* recovered */> { + if self.expected_tokens.is_empty() { + if self.token == *t { + self.bump(); + Ok(false) + } else { + self.unexpected_try_recover(t) + } + } else { + self.expect_one_of(slice::from_ref(t), &[]) + } + } + + /// Expect next token to be edible or inedible token. If edible, + /// then consume it; if inedible, then return without consuming + /// anything. Signal a fatal error if next token is unexpected. + pub fn expect_one_of( + &mut self, + edible: &[TokenKind], + inedible: &[TokenKind], + ) -> PResult<'a, bool /* recovered */> { + if edible.contains(&self.token.kind) { + self.bump(); + Ok(false) + } else if inedible.contains(&self.token.kind) { + // leave it in the input + Ok(false) + } else if self.last_unexpected_token_span == Some(self.token.span) { + FatalError.raise(); + } else { + self.expected_one_of_not_found(edible, inedible) + } + } + + // Public for rustfmt usage. + pub fn parse_ident(&mut self) -> PResult<'a, Ident> { + self.parse_ident_common(true) + } + + fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, Ident> { + match self.token.ident() { + Some((ident, is_raw)) => { + if !is_raw && ident.is_reserved() { + let mut err = self.expected_ident_found(); + if recover { + err.emit(); + } else { + return Err(err); + } + } + self.bump(); + Ok(ident) + } + _ => Err(match self.prev_token.kind { + TokenKind::DocComment(..) => { + self.span_fatal_err(self.prev_token.span, Error::UselessDocComment) + } + _ => self.expected_ident_found(), + }), + } + } + + /// Checks if the next token is `tok`, and returns `true` if so. + /// + /// This method will automatically add `tok` to `expected_tokens` if `tok` is not + /// encountered. + fn check(&mut self, tok: &TokenKind) -> bool { + let is_present = self.token == *tok; + if !is_present { + self.expected_tokens.push(TokenType::Token(tok.clone())); + } + is_present + } + + /// Consumes a token 'tok' if it exists. Returns whether the given token was present. + pub fn eat(&mut self, tok: &TokenKind) -> bool { + let is_present = self.check(tok); + if is_present { + self.bump() + } + is_present + } + + /// If the next token is the given keyword, returns `true` without eating it. + /// An expectation is also added for diagnostics purposes. + fn check_keyword(&mut self, kw: Symbol) -> bool { + self.expected_tokens.push(TokenType::Keyword(kw)); + self.token.is_keyword(kw) + } + + /// If the next token is the given keyword, eats it and returns `true`. + /// Otherwise, returns `false`. An expectation is also added for diagnostics purposes. + // Public for rustfmt usage. + pub fn eat_keyword(&mut self, kw: Symbol) -> bool { + if self.check_keyword(kw) { + self.bump(); + true + } else { + false + } + } + + fn eat_keyword_noexpect(&mut self, kw: Symbol) -> bool { + if self.token.is_keyword(kw) { + self.bump(); + true + } else { + false + } + } + + /// If the given word is not a keyword, signals an error. + /// If the next token is not the given word, signals an error. + /// Otherwise, eats it. + fn expect_keyword(&mut self, kw: Symbol) -> PResult<'a, ()> { + if !self.eat_keyword(kw) { self.unexpected() } else { Ok(()) } + } + + /// Is the given keyword `kw` followed by a non-reserved identifier? + fn is_kw_followed_by_ident(&self, kw: Symbol) -> bool { + self.token.is_keyword(kw) && self.look_ahead(1, |t| t.is_ident() && !t.is_reserved_ident()) + } + + fn check_or_expected(&mut self, ok: bool, typ: TokenType) -> bool { + if ok { + true + } else { + self.expected_tokens.push(typ); + false + } + } + + fn check_ident(&mut self) -> bool { + self.check_or_expected(self.token.is_ident(), TokenType::Ident) + } + + fn check_path(&mut self) -> bool { + self.check_or_expected(self.token.is_path_start(), TokenType::Path) + } + + fn check_type(&mut self) -> bool { + self.check_or_expected(self.token.can_begin_type(), TokenType::Type) + } + + fn check_const_arg(&mut self) -> bool { + self.check_or_expected(self.token.can_begin_const_arg(), TokenType::Const) + } + + /// Checks to see if the next token is either `+` or `+=`. + /// Otherwise returns `false`. + fn check_plus(&mut self) -> bool { + self.check_or_expected( + self.token.is_like_plus(), + TokenType::Token(token::BinOp(token::Plus)), + ) + } + + /// Eats the expected token if it's present possibly breaking + /// compound tokens like multi-character operators in process. + /// Returns `true` if the token was eaten. + fn break_and_eat(&mut self, expected: TokenKind) -> bool { + if self.token.kind == expected { + self.bump(); + return true; + } + match self.token.kind.break_two_token_op() { + Some((first, second)) if first == expected => { + let first_span = self.sess.source_map().start_point(self.token.span); + let second_span = self.token.span.with_lo(first_span.hi()); + self.token = Token::new(first, first_span); + self.bump_with(Token::new(second, second_span)); + true + } + _ => { + self.expected_tokens.push(TokenType::Token(expected)); + false + } + } + } + + /// Eats `+` possibly breaking tokens like `+=` in process. + fn eat_plus(&mut self) -> bool { + self.break_and_eat(token::BinOp(token::Plus)) + } + + /// Eats `&` possibly breaking tokens like `&&` in process. + /// Signals an error if `&` is not eaten. + fn expect_and(&mut self) -> PResult<'a, ()> { + if self.break_and_eat(token::BinOp(token::And)) { Ok(()) } else { self.unexpected() } + } + + /// Eats `|` possibly breaking tokens like `||` in process. + /// Signals an error if `|` was not eaten. + fn expect_or(&mut self) -> PResult<'a, ()> { + if self.break_and_eat(token::BinOp(token::Or)) { Ok(()) } else { self.unexpected() } + } + + /// Eats `<` possibly breaking tokens like `<<` in process. + fn eat_lt(&mut self) -> bool { + let ate = self.break_and_eat(token::Lt); + if ate { + // See doc comment for `unmatched_angle_bracket_count`. + self.unmatched_angle_bracket_count += 1; + self.max_angle_bracket_count += 1; + debug!("eat_lt: (increment) count={:?}", self.unmatched_angle_bracket_count); + } + ate + } + + /// Eats `<` possibly breaking tokens like `<<` in process. + /// Signals an error if `<` was not eaten. + fn expect_lt(&mut self) -> PResult<'a, ()> { + if self.eat_lt() { Ok(()) } else { self.unexpected() } + } + + /// Eats `>` possibly breaking tokens like `>>` in process. + /// Signals an error if `>` was not eaten. + fn expect_gt(&mut self) -> PResult<'a, ()> { + if self.break_and_eat(token::Gt) { + // See doc comment for `unmatched_angle_bracket_count`. + if self.unmatched_angle_bracket_count > 0 { + self.unmatched_angle_bracket_count -= 1; + debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count); + } + Ok(()) + } else { + self.unexpected() + } + } + + fn expect_any_with_type(&mut self, kets: &[&TokenKind], expect: TokenExpectType) -> bool { + kets.iter().any(|k| match expect { + TokenExpectType::Expect => self.check(k), + TokenExpectType::NoExpect => self.token == **k, + }) + } + + fn parse_seq_to_before_tokens<T>( + &mut self, + kets: &[&TokenKind], + sep: SeqSep, + expect: TokenExpectType, + mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool /* trailing */, bool /* recovered */)> { + let mut first = true; + let mut recovered = false; + let mut trailing = false; + let mut v = vec![]; + while !self.expect_any_with_type(kets, expect) { + if let token::CloseDelim(..) | token::Eof = self.token.kind { + break; + } + if let Some(ref t) = sep.sep { + if first { + first = false; + } else { + match self.expect(t) { + Ok(false) => {} + Ok(true) => { + recovered = true; + break; + } + Err(mut expect_err) => { + let sp = self.prev_token.span.shrink_to_hi(); + let token_str = pprust::token_kind_to_string(t); + + // Attempt to keep parsing if it was a similar separator. + if let Some(ref tokens) = t.similar_tokens() { + if tokens.contains(&self.token.kind) { + self.bump(); + } + } + + // If this was a missing `@` in a binding pattern + // bail with a suggestion + // https://github.com/rust-lang/rust/issues/72373 + if self.prev_token.is_ident() && self.token.kind == token::DotDot { + let msg = format!( + "if you meant to bind the contents of \ + the rest of the array pattern into `{}`, use `@`", + pprust::token_to_string(&self.prev_token) + ); + expect_err + .span_suggestion_verbose( + self.prev_token.span.shrink_to_hi().until(self.token.span), + &msg, + " @ ".to_string(), + Applicability::MaybeIncorrect, + ) + .emit(); + break; + } + + // Attempt to keep parsing if it was an omitted separator. + match f(self) { + Ok(t) => { + // Parsed successfully, therefore most probably the code only + // misses a separator. + expect_err + .span_suggestion_short( + self.sess.source_map().next_point(sp), + &format!("missing `{}`", token_str), + token_str, + Applicability::MaybeIncorrect, + ) + .emit(); + + v.push(t); + continue; + } + Err(mut e) => { + // Parsing failed, therefore it must be something more serious + // than just a missing separator. + expect_err.emit(); + + e.cancel(); + break; + } + } + } + } + } + } + if sep.trailing_sep_allowed && self.expect_any_with_type(kets, expect) { + trailing = true; + break; + } + + let t = f(self)?; + v.push(t); + } + + Ok((v, trailing, recovered)) + } + + /// Parses a sequence, not including the closing delimiter. The function + /// `f` must consume tokens until reaching the next separator or + /// closing bracket. + fn parse_seq_to_before_end<T>( + &mut self, + ket: &TokenKind, + sep: SeqSep, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool, bool)> { + self.parse_seq_to_before_tokens(&[ket], sep, TokenExpectType::Expect, f) + } + + /// Parses a sequence, including the closing delimiter. The function + /// `f` must consume tokens until reaching the next separator or + /// closing bracket. + fn parse_seq_to_end<T>( + &mut self, + ket: &TokenKind, + sep: SeqSep, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool /* trailing */)> { + let (val, trailing, recovered) = self.parse_seq_to_before_end(ket, sep, f)?; + if !recovered { + self.eat(ket); + } + Ok((val, trailing)) + } + + /// Parses a sequence, including the closing delimiter. The function + /// `f` must consume tokens until reaching the next separator or + /// closing bracket. + fn parse_unspanned_seq<T>( + &mut self, + bra: &TokenKind, + ket: &TokenKind, + sep: SeqSep, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool)> { + self.expect(bra)?; + self.parse_seq_to_end(ket, sep, f) + } + + fn parse_delim_comma_seq<T>( + &mut self, + delim: DelimToken, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool)> { + self.parse_unspanned_seq( + &token::OpenDelim(delim), + &token::CloseDelim(delim), + SeqSep::trailing_allowed(token::Comma), + f, + ) + } + + fn parse_paren_comma_seq<T>( + &mut self, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool)> { + self.parse_delim_comma_seq(token::Paren, f) + } + + /// Advance the parser by one token using provided token as the next one. + fn bump_with(&mut self, next_token: Token) { + // Bumping after EOF is a bad sign, usually an infinite loop. + if self.prev_token.kind == TokenKind::Eof { + let msg = "attempted to bump the parser past EOF (may be stuck in a loop)"; + self.span_bug(self.token.span, msg); + } + + // Update the current and previous tokens. + self.prev_token = mem::replace(&mut self.token, next_token); + + // Diagnostics. + self.expected_tokens.clear(); + } + + /// Advance the parser by one token. + pub fn bump(&mut self) { + let next_token = self.next_tok(self.token.span); + self.bump_with(next_token); + } + + /// Look-ahead `dist` tokens of `self.token` and get access to that token there. + /// When `dist == 0` then the current token is looked at. + pub fn look_ahead<R>(&self, dist: usize, looker: impl FnOnce(&Token) -> R) -> R { + if dist == 0 { + return looker(&self.token); + } + + let frame = &self.token_cursor.frame; + looker(&match frame.tree_cursor.look_ahead(dist - 1) { + Some(tree) => match tree { + TokenTree::Token(token) => token, + TokenTree::Delimited(dspan, delim, _) => { + Token::new(token::OpenDelim(delim), dspan.open) + } + }, + None => Token::new(token::CloseDelim(frame.delim), frame.span.close), + }) + } + + /// Returns whether any of the given keywords are `dist` tokens ahead of the current one. + fn is_keyword_ahead(&self, dist: usize, kws: &[Symbol]) -> bool { + self.look_ahead(dist, |t| kws.iter().any(|&kw| t.is_keyword(kw))) + } + + /// Parses asyncness: `async` or nothing. + fn parse_asyncness(&mut self) -> Async { + if self.eat_keyword(kw::Async) { + let span = self.prev_token.uninterpolated_span(); + Async::Yes { span, closure_id: DUMMY_NODE_ID, return_impl_trait_id: DUMMY_NODE_ID } + } else { + Async::No + } + } + + /// Parses unsafety: `unsafe` or nothing. + fn parse_unsafety(&mut self) -> Unsafe { + if self.eat_keyword(kw::Unsafe) { + Unsafe::Yes(self.prev_token.uninterpolated_span()) + } else { + Unsafe::No + } + } + + /// Parses constness: `const` or nothing. + fn parse_constness(&mut self) -> Const { + if self.eat_keyword(kw::Const) { + Const::Yes(self.prev_token.uninterpolated_span()) + } else { + Const::No + } + } + + /// Parses mutability (`mut` or nothing). + fn parse_mutability(&mut self) -> Mutability { + if self.eat_keyword(kw::Mut) { Mutability::Mut } else { Mutability::Not } + } + + /// Possibly parses mutability (`const` or `mut`). + fn parse_const_or_mut(&mut self) -> Option<Mutability> { + if self.eat_keyword(kw::Mut) { + Some(Mutability::Mut) + } else if self.eat_keyword(kw::Const) { + Some(Mutability::Not) + } else { + None + } + } + + fn parse_field_name(&mut self) -> PResult<'a, Ident> { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = self.token.kind + { + self.expect_no_suffix(self.token.span, "a tuple index", suffix); + self.bump(); + Ok(Ident::new(symbol, self.prev_token.span)) + } else { + self.parse_ident_common(false) + } + } + + fn parse_mac_args(&mut self) -> PResult<'a, P<MacArgs>> { + self.parse_mac_args_common(true).map(P) + } + + fn parse_attr_args(&mut self) -> PResult<'a, MacArgs> { + self.parse_mac_args_common(false) + } + + fn parse_mac_args_common(&mut self, delimited_only: bool) -> PResult<'a, MacArgs> { + Ok( + if self.check(&token::OpenDelim(DelimToken::Paren)) + || self.check(&token::OpenDelim(DelimToken::Bracket)) + || self.check(&token::OpenDelim(DelimToken::Brace)) + { + match self.parse_token_tree() { + TokenTree::Delimited(dspan, delim, tokens) => + // We've confirmed above that there is a delimiter so unwrapping is OK. + { + MacArgs::Delimited(dspan, MacDelimiter::from_token(delim).unwrap(), tokens) + } + _ => unreachable!(), + } + } else if !delimited_only { + if self.eat(&token::Eq) { + let eq_span = self.prev_token.span; + let mut is_interpolated_expr = false; + if let token::Interpolated(nt) = &self.token.kind { + if let token::NtExpr(..) = **nt { + is_interpolated_expr = true; + } + } + let token_tree = if is_interpolated_expr { + // We need to accept arbitrary interpolated expressions to continue + // supporting things like `doc = $expr` that work on stable. + // Non-literal interpolated expressions are rejected after expansion. + self.parse_token_tree() + } else { + self.parse_unsuffixed_lit()?.token_tree() + }; + + MacArgs::Eq(eq_span, token_tree.into()) + } else { + MacArgs::Empty + } + } else { + return self.unexpected(); + }, + ) + } + + fn parse_or_use_outer_attributes( + &mut self, + already_parsed_attrs: Option<AttrVec>, + ) -> PResult<'a, AttrVec> { + if let Some(attrs) = already_parsed_attrs { + Ok(attrs) + } else { + self.parse_outer_attributes().map(|a| a.into()) + } + } + + /// Parses a single token tree from the input. + pub(crate) fn parse_token_tree(&mut self) -> TokenTree { + match self.token.kind { + token::OpenDelim(..) => { + let frame = mem::replace( + &mut self.token_cursor.frame, + self.token_cursor.stack.pop().unwrap(), + ); + self.token = Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close); + self.bump(); + TokenTree::Delimited(frame.span, frame.delim, frame.tree_cursor.stream) + } + token::CloseDelim(_) | token::Eof => unreachable!(), + _ => { + self.bump(); + TokenTree::Token(self.prev_token.clone()) + } + } + } + + /// Parses a stream of tokens into a list of `TokenTree`s, up to EOF. + pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> { + let mut tts = Vec::new(); + while self.token != token::Eof { + tts.push(self.parse_token_tree()); + } + Ok(tts) + } + + pub fn parse_tokens(&mut self) -> TokenStream { + let mut result = Vec::new(); + loop { + match self.token.kind { + token::Eof | token::CloseDelim(..) => break, + _ => result.push(self.parse_token_tree().into()), + } + } + TokenStream::new(result) + } + + /// Evaluates the closure with restrictions in place. + /// + /// Afters the closure is evaluated, restrictions are reset. + fn with_res<T>(&mut self, res: Restrictions, f: impl FnOnce(&mut Self) -> T) -> T { + let old = self.restrictions; + self.restrictions = res; + let res = f(self); + self.restrictions = old; + res + } + + fn is_crate_vis(&self) -> bool { + self.token.is_keyword(kw::Crate) && self.look_ahead(1, |t| t != &token::ModSep) + } + + /// Parses `pub`, `pub(crate)` and `pub(in path)` plus shortcuts `crate` for `pub(crate)`, + /// `pub(self)` for `pub(in self)` and `pub(super)` for `pub(in super)`. + /// If the following element can't be a tuple (i.e., it's a function definition), then + /// it's not a tuple struct field), and the contents within the parentheses isn't valid, + /// so emit a proper diagnostic. + pub(crate) fn parse_visibility(&mut self, fbt: FollowedByType) -> PResult<'a, Visibility> { + maybe_whole!(self, NtVis, |x| x); + + self.expected_tokens.push(TokenType::Keyword(kw::Crate)); + if self.is_crate_vis() { + self.bump(); // `crate` + self.sess.gated_spans.gate(sym::crate_visibility_modifier, self.prev_token.span); + return Ok(respan(self.prev_token.span, VisibilityKind::Crate(CrateSugar::JustCrate))); + } + + if !self.eat_keyword(kw::Pub) { + // We need a span for our `Spanned<VisibilityKind>`, but there's inherently no + // keyword to grab a span from for inherited visibility; an empty span at the + // beginning of the current token would seem to be the "Schelling span". + return Ok(respan(self.token.span.shrink_to_lo(), VisibilityKind::Inherited)); + } + let lo = self.prev_token.span; + + if self.check(&token::OpenDelim(token::Paren)) { + // We don't `self.bump()` the `(` yet because this might be a struct definition where + // `()` or a tuple might be allowed. For example, `struct Struct(pub (), pub (usize));`. + // Because of this, we only `bump` the `(` if we're assured it is appropriate to do so + // by the following tokens. + if self.is_keyword_ahead(1, &[kw::Crate]) && self.look_ahead(2, |t| t != &token::ModSep) + // account for `pub(crate::foo)` + { + // Parse `pub(crate)`. + self.bump(); // `(` + self.bump(); // `crate` + self.expect(&token::CloseDelim(token::Paren))?; // `)` + let vis = VisibilityKind::Crate(CrateSugar::PubCrate); + return Ok(respan(lo.to(self.prev_token.span), vis)); + } else if self.is_keyword_ahead(1, &[kw::In]) { + // Parse `pub(in path)`. + self.bump(); // `(` + self.bump(); // `in` + let path = self.parse_path(PathStyle::Mod)?; // `path` + self.expect(&token::CloseDelim(token::Paren))?; // `)` + let vis = VisibilityKind::Restricted { path: P(path), id: ast::DUMMY_NODE_ID }; + return Ok(respan(lo.to(self.prev_token.span), vis)); + } else if self.look_ahead(2, |t| t == &token::CloseDelim(token::Paren)) + && self.is_keyword_ahead(1, &[kw::Super, kw::SelfLower]) + { + // Parse `pub(self)` or `pub(super)`. + self.bump(); // `(` + let path = self.parse_path(PathStyle::Mod)?; // `super`/`self` + self.expect(&token::CloseDelim(token::Paren))?; // `)` + let vis = VisibilityKind::Restricted { path: P(path), id: ast::DUMMY_NODE_ID }; + return Ok(respan(lo.to(self.prev_token.span), vis)); + } else if let FollowedByType::No = fbt { + // Provide this diagnostic if a type cannot follow; + // in particular, if this is not a tuple struct. + self.recover_incorrect_vis_restriction()?; + // Emit diagnostic, but continue with public visibility. + } + } + + Ok(respan(lo, VisibilityKind::Public)) + } + + /// Recovery for e.g. `pub(something) fn ...` or `struct X { pub(something) y: Z }` + fn recover_incorrect_vis_restriction(&mut self) -> PResult<'a, ()> { + self.bump(); // `(` + let path = self.parse_path(PathStyle::Mod)?; + self.expect(&token::CloseDelim(token::Paren))?; // `)` + + let msg = "incorrect visibility restriction"; + let suggestion = r##"some possible visibility restrictions are: +`pub(crate)`: visible only on the current crate +`pub(super)`: visible only in the current module's parent +`pub(in path::to::module)`: visible only on the specified path"##; + + let path_str = pprust::path_to_string(&path); + + struct_span_err!(self.sess.span_diagnostic, path.span, E0704, "{}", msg) + .help(suggestion) + .span_suggestion( + path.span, + &format!("make this visible only to module `{}` with `in`", path_str), + format!("in {}", path_str), + Applicability::MachineApplicable, + ) + .emit(); + + Ok(()) + } + + /// Parses `extern string_literal?`. + fn parse_extern(&mut self) -> PResult<'a, Extern> { + Ok(if self.eat_keyword(kw::Extern) { + Extern::from_abi(self.parse_abi()) + } else { + Extern::None + }) + } + + /// Parses a string literal as an ABI spec. + fn parse_abi(&mut self) -> Option<StrLit> { + match self.parse_str_lit() { + Ok(str_lit) => Some(str_lit), + Err(Some(lit)) => match lit.kind { + ast::LitKind::Err(_) => None, + _ => { + self.struct_span_err(lit.span, "non-string ABI literal") + .span_suggestion( + lit.span, + "specify the ABI with a string literal", + "\"C\"".to_string(), + Applicability::MaybeIncorrect, + ) + .emit(); + None + } + }, + Err(None) => None, + } + } + + /// Records all tokens consumed by the provided callback, + /// including the current token. These tokens are collected + /// into a `TokenStream`, and returned along with the result + /// of the callback. + /// + /// Note: If your callback consumes an opening delimiter + /// (including the case where you call `collect_tokens` + /// when the current token is an opening delimeter), + /// you must also consume the corresponding closing delimiter. + /// + /// That is, you can consume + /// `something ([{ }])` or `([{}])`, but not `([{}]` + /// + /// This restriction shouldn't be an issue in practice, + /// since this function is used to record the tokens for + /// a parsed AST item, which always has matching delimiters. + pub fn collect_tokens<R>( + &mut self, + f: impl FnOnce(&mut Self) -> PResult<'a, R>, + ) -> PResult<'a, (R, TokenStream)> { + // Record all tokens we parse when parsing this item. + let tokens: Vec<TreeAndJoint> = self.token_cursor.cur_token.clone().into_iter().collect(); + debug!("collect_tokens: starting with {:?}", tokens); + + // We need special handling for the case where `collect_tokens` is called + // on an opening delimeter (e.g. '('). At this point, we have already pushed + // a new frame - however, we want to record the original `TokenTree::Delimited`, + // for consistency with the case where we start recording one token earlier. + // See `TokenCursor::next` to see how `cur_token` is set up. + let prev_depth = + if matches!(self.token_cursor.cur_token, Some((TokenTree::Delimited(..), _))) { + if self.token_cursor.stack.is_empty() { + // There is nothing below us in the stack that + // the function could consume, so the only thing it can legally + // capture is the entire contents of the current frame. + return Ok((f(self)?, TokenStream::new(tokens))); + } + // We have already recorded the full `TokenTree::Delimited` when we created + // our `tokens` vector at the start of this function. We are now inside + // a new frame corresponding to the `TokenTree::Delimited` we already recoreded. + // We don't want to record any of the tokens inside this frame, since they + // will be duplicates of the tokens nested inside the `TokenTree::Delimited`. + // Therefore, we set our recording depth to the *previous* frame. This allows + // us to record a sequence like: `(foo).bar()`: the `(foo)` will be recored + // as our initial `cur_token`, while the `.bar()` will be recored after we + // pop the `(foo)` frame. + self.token_cursor.stack.len() - 1 + } else { + self.token_cursor.stack.len() + }; + let prev_collecting = + self.token_cursor.collecting.replace(Collecting { buf: tokens, depth: prev_depth }); + + let ret = f(self); + + let mut collected_tokens = if let Some(collecting) = self.token_cursor.collecting.take() { + collecting.buf + } else { + let msg = "our vector went away?"; + debug!("collect_tokens: {}", msg); + self.sess.span_diagnostic.delay_span_bug(self.token.span, &msg); + // This can happen due to a bad interaction of two unrelated recovery mechanisms + // with mismatched delimiters *and* recovery lookahead on the likely typo + // `pub ident(` (#62895, different but similar to the case above). + return Ok((ret?, TokenStream::default())); + }; + + debug!("collect_tokens: got raw tokens {:?}", collected_tokens); + + // If we're not at EOF our current token wasn't actually consumed by + // `f`, but it'll still be in our list that we pulled out. In that case + // put it back. + let extra_token = if self.token != token::Eof { collected_tokens.pop() } else { None }; + + if let Some(mut collecting) = prev_collecting { + // If we were previously collecting at the same depth, + // then the previous call to `collect_tokens` needs to see + // the tokens we just recorded. + // + // If we were previously recording at an lower `depth`, + // then the previous `collect_tokens` call already recorded + // this entire frame in the form of a `TokenTree::Delimited`, + // so there is nothing else for us to do. + if collecting.depth == prev_depth { + collecting.buf.extend(collected_tokens.iter().cloned()); + collecting.buf.extend(extra_token); + debug!("collect_tokens: updating previous buf to {:?}", collecting); + } + self.token_cursor.collecting = Some(collecting) + } + + Ok((ret?, TokenStream::new(collected_tokens))) + } + + /// `::{` or `::*` + fn is_import_coupler(&mut self) -> bool { + self.check(&token::ModSep) + && self.look_ahead(1, |t| { + *t == token::OpenDelim(token::Brace) || *t == token::BinOp(token::Star) + }) + } +} + +crate fn make_unclosed_delims_error( + unmatched: UnmatchedBrace, + sess: &ParseSess, +) -> Option<DiagnosticBuilder<'_>> { + // `None` here means an `Eof` was found. We already emit those errors elsewhere, we add them to + // `unmatched_braces` only for error recovery in the `Parser`. + let found_delim = unmatched.found_delim?; + let mut err = sess.span_diagnostic.struct_span_err( + unmatched.found_span, + &format!( + "mismatched closing delimiter: `{}`", + pprust::token_kind_to_string(&token::CloseDelim(found_delim)), + ), + ); + err.span_label(unmatched.found_span, "mismatched closing delimiter"); + if let Some(sp) = unmatched.candidate_span { + err.span_label(sp, "closing delimiter possibly meant for this"); + } + if let Some(sp) = unmatched.unclosed_span { + err.span_label(sp, "unclosed delimiter"); + } + Some(err) +} + +pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &ParseSess) { + *sess.reached_eof.borrow_mut() |= + unclosed_delims.iter().any(|unmatched_delim| unmatched_delim.found_delim.is_none()); + for unmatched in unclosed_delims.drain(..) { + if let Some(mut e) = make_unclosed_delims_error(unmatched, sess) { + e.emit(); + } + } +} diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs new file mode 100644 index 00000000000..f40cd1131d2 --- /dev/null +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -0,0 +1,170 @@ +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Nonterminal, NonterminalKind, Token}; +use rustc_ast_pretty::pprust; +use rustc_errors::PResult; +use rustc_span::symbol::{kw, Ident}; + +use crate::parser::{FollowedByType, Parser, PathStyle}; + +impl<'a> Parser<'a> { + /// Checks whether a non-terminal may begin with a particular token. + /// + /// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that + /// token. Be conservative (return true) if not sure. + pub fn nonterminal_may_begin_with(kind: NonterminalKind, token: &Token) -> bool { + /// Checks whether the non-terminal may contain a single (non-keyword) identifier. + fn may_be_ident(nt: &token::Nonterminal) -> bool { + match *nt { + token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) | token::NtLifetime(_) => { + false + } + _ => true, + } + } + + match kind { + NonterminalKind::Expr => { + token.can_begin_expr() + // This exception is here for backwards compatibility. + && !token.is_keyword(kw::Let) + } + NonterminalKind::Ty => token.can_begin_type(), + NonterminalKind::Ident => get_macro_ident(token).is_some(), + NonterminalKind::Literal => token.can_begin_literal_maybe_minus(), + NonterminalKind::Vis => match token.kind { + // The follow-set of :vis + "priv" keyword + interpolated + token::Comma | token::Ident(..) | token::Interpolated(..) => true, + _ => token.can_begin_type(), + }, + NonterminalKind::Block => match token.kind { + token::OpenDelim(token::Brace) => true, + token::Interpolated(ref nt) => match **nt { + token::NtItem(_) + | token::NtPat(_) + | token::NtTy(_) + | token::NtIdent(..) + | token::NtMeta(_) + | token::NtPath(_) + | token::NtVis(_) => false, // none of these may start with '{'. + _ => true, + }, + _ => false, + }, + NonterminalKind::Path | NonterminalKind::Meta => match token.kind { + token::ModSep | token::Ident(..) => true, + token::Interpolated(ref nt) => match **nt { + token::NtPath(_) | token::NtMeta(_) => true, + _ => may_be_ident(&nt), + }, + _ => false, + }, + NonterminalKind::Pat => match token.kind { + token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) + token::OpenDelim(token::Paren) | // tuple pattern + token::OpenDelim(token::Bracket) | // slice pattern + token::BinOp(token::And) | // reference + token::BinOp(token::Minus) | // negative literal + token::AndAnd | // double reference + token::Literal(..) | // literal + token::DotDot | // range pattern (future compat) + token::DotDotDot | // range pattern (future compat) + token::ModSep | // path + token::Lt | // path (UFCS constant) + token::BinOp(token::Shl) => true, // path (double UFCS) + token::Interpolated(ref nt) => may_be_ident(nt), + _ => false, + }, + NonterminalKind::Lifetime => match token.kind { + token::Lifetime(_) => true, + token::Interpolated(ref nt) => match **nt { + token::NtLifetime(_) | token::NtTT(_) => true, + _ => false, + }, + _ => false, + }, + NonterminalKind::TT | NonterminalKind::Item | NonterminalKind::Stmt => match token.kind + { + token::CloseDelim(_) => false, + _ => true, + }, + } + } + + pub fn parse_nonterminal(&mut self, kind: NonterminalKind) -> PResult<'a, Nonterminal> { + // Any `Nonterminal` which stores its tokens (currently `NtItem` and `NtExpr`) + // needs to have them force-captured here. + // A `macro_rules!` invocation may pass a captured item/expr to a proc-macro, + // which requires having captured tokens available. Since we cannot determine + // in advance whether or not a proc-macro will be (transitively) invoked, + // we always capture tokens for any `Nonterminal` which needs them. + Ok(match kind { + NonterminalKind::Item => match self.collect_tokens(|this| this.parse_item())? { + (Some(mut item), tokens) => { + // If we captured tokens during parsing (due to outer attributes), + // use those. + if item.tokens.is_none() { + item.tokens = Some(tokens); + } + token::NtItem(item) + } + (None, _) => { + return Err(self.struct_span_err(self.token.span, "expected an item keyword")); + } + }, + NonterminalKind::Block => token::NtBlock(self.parse_block()?), + NonterminalKind::Stmt => match self.parse_stmt()? { + Some(s) => token::NtStmt(s), + None => return Err(self.struct_span_err(self.token.span, "expected a statement")), + }, + NonterminalKind::Pat => { + let (mut pat, tokens) = self.collect_tokens(|this| this.parse_pat(None))?; + // We have have eaten an NtPat, which could already have tokens + if pat.tokens.is_none() { + pat.tokens = Some(tokens); + } + token::NtPat(pat) + } + NonterminalKind::Expr => { + let (mut expr, tokens) = self.collect_tokens(|this| this.parse_expr())?; + // If we captured tokens during parsing (due to outer attributes), + // use those. + if expr.tokens.is_none() { + expr.tokens = Some(tokens); + } + token::NtExpr(expr) + } + NonterminalKind::Literal => token::NtLiteral(self.parse_literal_maybe_minus()?), + NonterminalKind::Ty => token::NtTy(self.parse_ty()?), + // this could be handled like a token, since it is one + NonterminalKind::Ident => { + if let Some((ident, is_raw)) = get_macro_ident(&self.token) { + self.bump(); + token::NtIdent(ident, is_raw) + } else { + let token_str = pprust::token_to_string(&self.token); + let msg = &format!("expected ident, found {}", &token_str); + return Err(self.struct_span_err(self.token.span, msg)); + } + } + NonterminalKind::Path => token::NtPath(self.parse_path(PathStyle::Type)?), + NonterminalKind::Meta => token::NtMeta(P(self.parse_attr_item()?)), + NonterminalKind::TT => token::NtTT(self.parse_token_tree()), + NonterminalKind::Vis => token::NtVis(self.parse_visibility(FollowedByType::Yes)?), + NonterminalKind::Lifetime => { + if self.check_lifetime() { + token::NtLifetime(self.expect_lifetime().ident) + } else { + let token_str = pprust::token_to_string(&self.token); + let msg = &format!("expected a lifetime, found `{}`", &token_str); + return Err(self.struct_span_err(self.token.span, msg)); + } + } + }) + } +} + +/// The token is an identifier, but not `_`. +/// We prohibit passing `_` to macros expecting `ident` for now. +fn get_macro_ident(token: &Token) -> Option<(Ident, bool)> { + token.ident().filter(|(ident, _)| ident.name != kw::Underscore) +} diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs new file mode 100644 index 00000000000..2c0133a24dc --- /dev/null +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -0,0 +1,1012 @@ +use super::{Parser, PathStyle}; +use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; +use rustc_ast::mut_visit::{noop_visit_mac, noop_visit_pat, MutVisitor}; +use rustc_ast::ptr::P; +use rustc_ast::token; +use rustc_ast::{self as ast, AttrVec, Attribute, FieldPat, MacCall, Pat, PatKind, RangeEnd}; +use rustc_ast::{BindingMode, Expr, ExprKind, Mutability, Path, QSelf, RangeSyntax}; +use rustc_ast_pretty::pprust; +use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, PResult}; +use rustc_span::source_map::{respan, Span, Spanned}; +use rustc_span::symbol::{kw, sym, Ident}; + +type Expected = Option<&'static str>; + +/// `Expected` for function and lambda parameter patterns. +pub(super) const PARAM_EXPECTED: Expected = Some("parameter name"); + +const WHILE_PARSING_OR_MSG: &str = "while parsing this or-pattern starting here"; + +/// Whether or not an or-pattern should be gated when occurring in the current context. +#[derive(PartialEq)] +pub(super) enum GateOr { + Yes, + No, +} + +/// Whether or not to recover a `,` when parsing or-patterns. +#[derive(PartialEq, Copy, Clone)] +enum RecoverComma { + Yes, + No, +} + +impl<'a> Parser<'a> { + /// Parses a pattern. + /// + /// Corresponds to `pat<no_top_alt>` in RFC 2535 and does not admit or-patterns + /// at the top level. Used when parsing the parameters of lambda expressions, + /// functions, function pointers, and `pat` macro fragments. + pub fn parse_pat(&mut self, expected: Expected) -> PResult<'a, P<Pat>> { + self.parse_pat_with_range_pat(true, expected) + } + + /// Entry point to the main pattern parser. + /// Corresponds to `top_pat` in RFC 2535 and allows or-pattern at the top level. + pub(super) fn parse_top_pat(&mut self, gate_or: GateOr) -> PResult<'a, P<Pat>> { + // Allow a '|' before the pats (RFCs 1925, 2530, and 2535). + let gated_leading_vert = self.eat_or_separator(None) && gate_or == GateOr::Yes; + let leading_vert_span = self.prev_token.span; + + // Parse the possibly-or-pattern. + let pat = self.parse_pat_with_or(None, gate_or, RecoverComma::Yes)?; + + // If we parsed a leading `|` which should be gated, + // and no other gated or-pattern has been parsed thus far, + // then we should really gate the leading `|`. + // This complicated procedure is done purely for diagnostics UX. + if gated_leading_vert && self.sess.gated_spans.is_ungated(sym::or_patterns) { + self.sess.gated_spans.gate(sym::or_patterns, leading_vert_span); + } + + Ok(pat) + } + + /// Parse the pattern for a function or function pointer parameter. + /// Special recovery is provided for or-patterns and leading `|`. + pub(super) fn parse_fn_param_pat(&mut self) -> PResult<'a, P<Pat>> { + self.recover_leading_vert(None, "not allowed in a parameter pattern"); + let pat = self.parse_pat_with_or(PARAM_EXPECTED, GateOr::No, RecoverComma::No)?; + + if let PatKind::Or(..) = &pat.kind { + self.ban_illegal_fn_param_or_pat(&pat); + } + + Ok(pat) + } + + /// Ban `A | B` immediately in a parameter pattern and suggest wrapping in parens. + fn ban_illegal_fn_param_or_pat(&self, pat: &Pat) { + let msg = "wrap the pattern in parenthesis"; + let fix = format!("({})", pprust::pat_to_string(pat)); + self.struct_span_err(pat.span, "an or-pattern parameter must be wrapped in parenthesis") + .span_suggestion(pat.span, msg, fix, Applicability::MachineApplicable) + .emit(); + } + + /// Parses a pattern, that may be a or-pattern (e.g. `Foo | Bar` in `Some(Foo | Bar)`). + /// Corresponds to `pat<allow_top_alt>` in RFC 2535. + fn parse_pat_with_or( + &mut self, + expected: Expected, + gate_or: GateOr, + rc: RecoverComma, + ) -> PResult<'a, P<Pat>> { + // Parse the first pattern (`p_0`). + let first_pat = self.parse_pat(expected)?; + self.maybe_recover_unexpected_comma(first_pat.span, rc)?; + + // If the next token is not a `|`, + // this is not an or-pattern and we should exit here. + if !self.check(&token::BinOp(token::Or)) && self.token != token::OrOr { + return Ok(first_pat); + } + + // Parse the patterns `p_1 | ... | p_n` where `n > 0`. + let lo = first_pat.span; + let mut pats = vec![first_pat]; + while self.eat_or_separator(Some(lo)) { + let pat = self.parse_pat(expected).map_err(|mut err| { + err.span_label(lo, WHILE_PARSING_OR_MSG); + err + })?; + self.maybe_recover_unexpected_comma(pat.span, rc)?; + pats.push(pat); + } + let or_pattern_span = lo.to(self.prev_token.span); + + // Feature gate the or-pattern if instructed: + if gate_or == GateOr::Yes { + self.sess.gated_spans.gate(sym::or_patterns, or_pattern_span); + } + + Ok(self.mk_pat(or_pattern_span, PatKind::Or(pats))) + } + + /// Eat the or-pattern `|` separator. + /// If instead a `||` token is encountered, recover and pretend we parsed `|`. + fn eat_or_separator(&mut self, lo: Option<Span>) -> bool { + if self.recover_trailing_vert(lo) { + return false; + } + + match self.token.kind { + token::OrOr => { + // Found `||`; Recover and pretend we parsed `|`. + self.ban_unexpected_or_or(lo); + self.bump(); + true + } + _ => self.eat(&token::BinOp(token::Or)), + } + } + + /// Recover if `|` or `||` is the current token and we have one of the + /// tokens `=>`, `if`, `=`, `:`, `;`, `,`, `]`, `)`, or `}` ahead of us. + /// + /// These tokens all indicate that we reached the end of the or-pattern + /// list and can now reliably say that the `|` was an illegal trailing vert. + /// Note that there are more tokens such as `@` for which we know that the `|` + /// is an illegal parse. However, the user's intent is less clear in that case. + fn recover_trailing_vert(&mut self, lo: Option<Span>) -> bool { + let is_end_ahead = self.look_ahead(1, |token| match &token.uninterpolate().kind { + token::FatArrow // e.g. `a | => 0,`. + | token::Ident(kw::If, false) // e.g. `a | if expr`. + | token::Eq // e.g. `let a | = 0`. + | token::Semi // e.g. `let a |;`. + | token::Colon // e.g. `let a | :`. + | token::Comma // e.g. `let (a |,)`. + | token::CloseDelim(token::Bracket) // e.g. `let [a | ]`. + | token::CloseDelim(token::Paren) // e.g. `let (a | )`. + | token::CloseDelim(token::Brace) => true, // e.g. `let A { f: a | }`. + _ => false, + }); + match (is_end_ahead, &self.token.kind) { + (true, token::BinOp(token::Or) | token::OrOr) => { + self.ban_illegal_vert(lo, "trailing", "not allowed in an or-pattern"); + self.bump(); + true + } + _ => false, + } + } + + /// We have parsed `||` instead of `|`. Error and suggest `|` instead. + fn ban_unexpected_or_or(&mut self, lo: Option<Span>) { + let mut err = self.struct_span_err(self.token.span, "unexpected token `||` after pattern"); + err.span_suggestion( + self.token.span, + "use a single `|` to separate multiple alternative patterns", + "|".to_owned(), + Applicability::MachineApplicable, + ); + if let Some(lo) = lo { + err.span_label(lo, WHILE_PARSING_OR_MSG); + } + err.emit(); + } + + /// Some special error handling for the "top-level" patterns in a match arm, + /// `for` loop, `let`, &c. (in contrast to subpatterns within such). + fn maybe_recover_unexpected_comma(&mut self, lo: Span, rc: RecoverComma) -> PResult<'a, ()> { + if rc == RecoverComma::No || self.token != token::Comma { + return Ok(()); + } + + // An unexpected comma after a top-level pattern is a clue that the + // user (perhaps more accustomed to some other language) forgot the + // parentheses in what should have been a tuple pattern; return a + // suggestion-enhanced error here rather than choking on the comma later. + let comma_span = self.token.span; + self.bump(); + if let Err(mut err) = self.skip_pat_list() { + // We didn't expect this to work anyway; we just wanted to advance to the + // end of the comma-sequence so we know the span to suggest parenthesizing. + err.cancel(); + } + let seq_span = lo.to(self.prev_token.span); + let mut err = self.struct_span_err(comma_span, "unexpected `,` in pattern"); + if let Ok(seq_snippet) = self.span_to_snippet(seq_span) { + err.span_suggestion( + seq_span, + "try adding parentheses to match on a tuple...", + format!("({})", seq_snippet), + Applicability::MachineApplicable, + ) + .span_suggestion( + seq_span, + "...or a vertical bar to match on multiple alternatives", + seq_snippet.replace(",", " |"), + Applicability::MachineApplicable, + ); + } + Err(err) + } + + /// Parse and throw away a parentesized comma separated + /// sequence of patterns until `)` is reached. + fn skip_pat_list(&mut self) -> PResult<'a, ()> { + while !self.check(&token::CloseDelim(token::Paren)) { + self.parse_pat(None)?; + if !self.eat(&token::Comma) { + return Ok(()); + } + } + Ok(()) + } + + /// Recursive possibly-or-pattern parser with recovery for an erroneous leading `|`. + /// See `parse_pat_with_or` for details on parsing or-patterns. + fn parse_pat_with_or_inner(&mut self) -> PResult<'a, P<Pat>> { + self.recover_leading_vert(None, "only allowed in a top-level pattern"); + self.parse_pat_with_or(None, GateOr::Yes, RecoverComma::No) + } + + /// Recover if `|` or `||` is here. + /// The user is thinking that a leading `|` is allowed in this position. + fn recover_leading_vert(&mut self, lo: Option<Span>, ctx: &str) { + if let token::BinOp(token::Or) | token::OrOr = self.token.kind { + self.ban_illegal_vert(lo, "leading", ctx); + self.bump(); + } + } + + /// A `|` or possibly `||` token shouldn't be here. Ban it. + fn ban_illegal_vert(&mut self, lo: Option<Span>, pos: &str, ctx: &str) { + let span = self.token.span; + let mut err = self.struct_span_err(span, &format!("a {} `|` is {}", pos, ctx)); + err.span_suggestion( + span, + &format!("remove the `{}`", pprust::token_to_string(&self.token)), + String::new(), + Applicability::MachineApplicable, + ); + if let Some(lo) = lo { + err.span_label(lo, WHILE_PARSING_OR_MSG); + } + if let token::OrOr = self.token.kind { + err.note("alternatives in or-patterns are separated with `|`, not `||`"); + } + err.emit(); + } + + /// Parses a pattern, with a setting whether modern range patterns (e.g., `a..=b`, `a..b` are + /// allowed). + fn parse_pat_with_range_pat( + &mut self, + allow_range_pat: bool, + expected: Expected, + ) -> PResult<'a, P<Pat>> { + maybe_recover_from_interpolated_ty_qpath!(self, true); + maybe_whole!(self, NtPat, |x| x); + + let lo = self.token.span; + + let pat = if self.check(&token::BinOp(token::And)) || self.token.kind == token::AndAnd { + self.parse_pat_deref(expected)? + } else if self.check(&token::OpenDelim(token::Paren)) { + self.parse_pat_tuple_or_parens()? + } else if self.check(&token::OpenDelim(token::Bracket)) { + // Parse `[pat, pat,...]` as a slice pattern. + let (pats, _) = + self.parse_delim_comma_seq(token::Bracket, |p| p.parse_pat_with_or_inner())?; + PatKind::Slice(pats) + } else if self.check(&token::DotDot) && !self.is_pat_range_end_start(1) { + // A rest pattern `..`. + self.bump(); // `..` + PatKind::Rest + } else if self.check(&token::DotDotDot) && !self.is_pat_range_end_start(1) { + self.recover_dotdotdot_rest_pat(lo) + } else if let Some(form) = self.parse_range_end() { + self.parse_pat_range_to(form)? // `..=X`, `...X`, or `..X`. + } else if self.eat_keyword(kw::Underscore) { + // Parse _ + PatKind::Wild + } else if self.eat_keyword(kw::Mut) { + self.parse_pat_ident_mut()? + } else if self.eat_keyword(kw::Ref) { + // Parse ref ident @ pat / ref mut ident @ pat + let mutbl = self.parse_mutability(); + self.parse_pat_ident(BindingMode::ByRef(mutbl))? + } else if self.eat_keyword(kw::Box) { + // Parse `box pat` + let pat = self.parse_pat_with_range_pat(false, None)?; + self.sess.gated_spans.gate(sym::box_patterns, lo.to(self.prev_token.span)); + PatKind::Box(pat) + } else if self.can_be_ident_pat() { + // Parse `ident @ pat` + // This can give false positives and parse nullary enums, + // they are dealt with later in resolve. + self.parse_pat_ident(BindingMode::ByValue(Mutability::Not))? + } else if self.is_start_of_pat_with_path() { + // Parse pattern starting with a path + let (qself, path) = if self.eat_lt() { + // Parse a qualified path + let (qself, path) = self.parse_qpath(PathStyle::Expr)?; + (Some(qself), path) + } else { + // Parse an unqualified path + (None, self.parse_path(PathStyle::Expr)?) + }; + let span = lo.to(self.prev_token.span); + + if qself.is_none() && self.check(&token::Not) { + self.parse_pat_mac_invoc(path)? + } else if let Some(form) = self.parse_range_end() { + let begin = self.mk_expr(span, ExprKind::Path(qself, path), AttrVec::new()); + self.parse_pat_range_begin_with(begin, form)? + } else if self.check(&token::OpenDelim(token::Brace)) { + self.parse_pat_struct(qself, path)? + } else if self.check(&token::OpenDelim(token::Paren)) { + self.parse_pat_tuple_struct(qself, path)? + } else { + PatKind::Path(qself, path) + } + } else { + // Try to parse everything else as literal with optional minus + match self.parse_literal_maybe_minus() { + Ok(begin) => match self.parse_range_end() { + Some(form) => self.parse_pat_range_begin_with(begin, form)?, + None => PatKind::Lit(begin), + }, + Err(err) => return self.fatal_unexpected_non_pat(err, expected), + } + }; + + let pat = self.mk_pat(lo.to(self.prev_token.span), pat); + let pat = self.maybe_recover_from_bad_qpath(pat, true)?; + let pat = self.recover_intersection_pat(pat)?; + + if !allow_range_pat { + self.ban_pat_range_if_ambiguous(&pat) + } + + Ok(pat) + } + + /// Recover from a typoed `...` pattern that was encountered + /// Ref: Issue #70388 + fn recover_dotdotdot_rest_pat(&mut self, lo: Span) -> PatKind { + // A typoed rest pattern `...`. + self.bump(); // `...` + + // The user probably mistook `...` for a rest pattern `..`. + self.struct_span_err(lo, "unexpected `...`") + .span_label(lo, "not a valid pattern") + .span_suggestion_short( + lo, + "for a rest pattern, use `..` instead of `...`", + "..".to_owned(), + Applicability::MachineApplicable, + ) + .emit(); + PatKind::Rest + } + + /// Try to recover the more general form `intersect ::= $pat_lhs @ $pat_rhs`. + /// + /// Allowed binding patterns generated by `binding ::= ref? mut? $ident @ $pat_rhs` + /// should already have been parsed by now at this point, + /// if the next token is `@` then we can try to parse the more general form. + /// + /// Consult `parse_pat_ident` for the `binding` grammar. + /// + /// The notion of intersection patterns are found in + /// e.g. [F#][and] where they are called AND-patterns. + /// + /// [and]: https://docs.microsoft.com/en-us/dotnet/fsharp/language-reference/pattern-matching + fn recover_intersection_pat(&mut self, lhs: P<Pat>) -> PResult<'a, P<Pat>> { + if self.token.kind != token::At { + // Next token is not `@` so it's not going to be an intersection pattern. + return Ok(lhs); + } + + // At this point we attempt to parse `@ $pat_rhs` and emit an error. + self.bump(); // `@` + let mut rhs = self.parse_pat(None)?; + let sp = lhs.span.to(rhs.span); + + if let PatKind::Ident(_, _, ref mut sub @ None) = rhs.kind { + // The user inverted the order, so help them fix that. + let mut applicability = Applicability::MachineApplicable; + // FIXME(bindings_after_at): Remove this code when stabilizing the feature. + lhs.walk(&mut |p| match p.kind { + // `check_match` is unhappy if the subpattern has a binding anywhere. + PatKind::Ident(..) => { + applicability = Applicability::MaybeIncorrect; + false // Short-circuit. + } + _ => true, + }); + + let lhs_span = lhs.span; + // Move the LHS into the RHS as a subpattern. + // The RHS is now the full pattern. + *sub = Some(lhs); + + self.struct_span_err(sp, "pattern on wrong side of `@`") + .span_label(lhs_span, "pattern on the left, should be on the right") + .span_label(rhs.span, "binding on the right, should be on the left") + .span_suggestion(sp, "switch the order", pprust::pat_to_string(&rhs), applicability) + .emit(); + } else { + // The special case above doesn't apply so we may have e.g. `A(x) @ B(y)`. + rhs.kind = PatKind::Wild; + self.struct_span_err(sp, "left-hand side of `@` must be a binding") + .span_label(lhs.span, "interpreted as a pattern, not a binding") + .span_label(rhs.span, "also a pattern") + .note("bindings are `x`, `mut x`, `ref x`, and `ref mut x`") + .emit(); + } + + rhs.span = sp; + Ok(rhs) + } + + /// Ban a range pattern if it has an ambiguous interpretation. + fn ban_pat_range_if_ambiguous(&self, pat: &Pat) { + match pat.kind { + PatKind::Range( + .., + Spanned { node: RangeEnd::Included(RangeSyntax::DotDotDot), .. }, + ) => return, + PatKind::Range(..) => {} + _ => return, + } + + self.struct_span_err(pat.span, "the range pattern here has ambiguous interpretation") + .span_suggestion( + pat.span, + "add parentheses to clarify the precedence", + format!("({})", pprust::pat_to_string(&pat)), + // "ambiguous interpretation" implies that we have to be guessing + Applicability::MaybeIncorrect, + ) + .emit(); + } + + /// Parse `&pat` / `&mut pat`. + fn parse_pat_deref(&mut self, expected: Expected) -> PResult<'a, PatKind> { + self.expect_and()?; + self.recover_lifetime_in_deref_pat(); + let mutbl = self.parse_mutability(); + let subpat = self.parse_pat_with_range_pat(false, expected)?; + Ok(PatKind::Ref(subpat, mutbl)) + } + + fn recover_lifetime_in_deref_pat(&mut self) { + if let token::Lifetime(name) = self.token.kind { + self.bump(); // `'a` + + let span = self.prev_token.span; + self.struct_span_err(span, &format!("unexpected lifetime `{}` in pattern", name)) + .span_suggestion( + span, + "remove the lifetime", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + /// Parse a tuple or parenthesis pattern. + fn parse_pat_tuple_or_parens(&mut self) -> PResult<'a, PatKind> { + let (fields, trailing_comma) = + self.parse_paren_comma_seq(|p| p.parse_pat_with_or_inner())?; + + // Here, `(pat,)` is a tuple pattern. + // For backward compatibility, `(..)` is a tuple pattern as well. + Ok(if fields.len() == 1 && !(trailing_comma || fields[0].is_rest()) { + PatKind::Paren(fields.into_iter().next().unwrap()) + } else { + PatKind::Tuple(fields) + }) + } + + /// Parse a mutable binding with the `mut` token already eaten. + fn parse_pat_ident_mut(&mut self) -> PResult<'a, PatKind> { + let mut_span = self.prev_token.span; + + if self.eat_keyword(kw::Ref) { + return self.recover_mut_ref_ident(mut_span); + } + + self.recover_additional_muts(); + + // Make sure we don't allow e.g. `let mut $p;` where `$p:pat`. + if let token::Interpolated(ref nt) = self.token.kind { + if let token::NtPat(_) = **nt { + self.expected_ident_found().emit(); + } + } + + // Parse the pattern we hope to be an identifier. + let mut pat = self.parse_pat(Some("identifier"))?; + + // If we don't have `mut $ident (@ pat)?`, error. + if let PatKind::Ident(BindingMode::ByValue(m @ Mutability::Not), ..) = &mut pat.kind { + // Don't recurse into the subpattern. + // `mut` on the outer binding doesn't affect the inner bindings. + *m = Mutability::Mut; + } else { + // Add `mut` to any binding in the parsed pattern. + let changed_any_binding = Self::make_all_value_bindings_mutable(&mut pat); + self.ban_mut_general_pat(mut_span, &pat, changed_any_binding); + } + + Ok(pat.into_inner().kind) + } + + /// Recover on `mut ref? ident @ pat` and suggest + /// that the order of `mut` and `ref` is incorrect. + fn recover_mut_ref_ident(&mut self, lo: Span) -> PResult<'a, PatKind> { + let mutref_span = lo.to(self.prev_token.span); + self.struct_span_err(mutref_span, "the order of `mut` and `ref` is incorrect") + .span_suggestion( + mutref_span, + "try switching the order", + "ref mut".into(), + Applicability::MachineApplicable, + ) + .emit(); + + self.parse_pat_ident(BindingMode::ByRef(Mutability::Mut)) + } + + /// Turn all by-value immutable bindings in a pattern into mutable bindings. + /// Returns `true` if any change was made. + fn make_all_value_bindings_mutable(pat: &mut P<Pat>) -> bool { + struct AddMut(bool); + impl MutVisitor for AddMut { + fn visit_mac(&mut self, mac: &mut MacCall) { + noop_visit_mac(mac, self); + } + + fn visit_pat(&mut self, pat: &mut P<Pat>) { + if let PatKind::Ident(BindingMode::ByValue(m @ Mutability::Not), ..) = &mut pat.kind + { + self.0 = true; + *m = Mutability::Mut; + } + noop_visit_pat(pat, self); + } + } + + let mut add_mut = AddMut(false); + add_mut.visit_pat(pat); + add_mut.0 + } + + /// Error on `mut $pat` where `$pat` is not an ident. + fn ban_mut_general_pat(&self, lo: Span, pat: &Pat, changed_any_binding: bool) { + let span = lo.to(pat.span); + let fix = pprust::pat_to_string(&pat); + let (problem, suggestion) = if changed_any_binding { + ("`mut` must be attached to each individual binding", "add `mut` to each binding") + } else { + ("`mut` must be followed by a named binding", "remove the `mut` prefix") + }; + self.struct_span_err(span, problem) + .span_suggestion(span, suggestion, fix, Applicability::MachineApplicable) + .note("`mut` may be followed by `variable` and `variable @ pattern`") + .emit(); + } + + /// Eat any extraneous `mut`s and error + recover if we ate any. + fn recover_additional_muts(&mut self) { + let lo = self.token.span; + while self.eat_keyword(kw::Mut) {} + if lo == self.token.span { + return; + } + + let span = lo.to(self.prev_token.span); + self.struct_span_err(span, "`mut` on a binding may not be repeated") + .span_suggestion( + span, + "remove the additional `mut`s", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + + /// Parse macro invocation + fn parse_pat_mac_invoc(&mut self, path: Path) -> PResult<'a, PatKind> { + self.bump(); + let args = self.parse_mac_args()?; + let mac = MacCall { path, args, prior_type_ascription: self.last_type_ascription }; + Ok(PatKind::MacCall(mac)) + } + + fn fatal_unexpected_non_pat( + &mut self, + mut err: DiagnosticBuilder<'a>, + expected: Expected, + ) -> PResult<'a, P<Pat>> { + err.cancel(); + + let expected = expected.unwrap_or("pattern"); + let msg = format!("expected {}, found {}", expected, super::token_descr(&self.token)); + + let mut err = self.struct_span_err(self.token.span, &msg); + err.span_label(self.token.span, format!("expected {}", expected)); + + let sp = self.sess.source_map().start_point(self.token.span); + if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&sp) { + self.sess.expr_parentheses_needed(&mut err, *sp, None); + } + + Err(err) + } + + /// Parses the range pattern end form `".." | "..." | "..=" ;`. + fn parse_range_end(&mut self) -> Option<Spanned<RangeEnd>> { + let re = if self.eat(&token::DotDotDot) { + RangeEnd::Included(RangeSyntax::DotDotDot) + } else if self.eat(&token::DotDotEq) { + RangeEnd::Included(RangeSyntax::DotDotEq) + } else if self.eat(&token::DotDot) { + self.sess.gated_spans.gate(sym::exclusive_range_pattern, self.prev_token.span); + RangeEnd::Excluded + } else { + return None; + }; + Some(respan(self.prev_token.span, re)) + } + + /// Parse a range pattern `$begin $form $end?` where `$form = ".." | "..." | "..=" ;`. + /// `$begin $form` has already been parsed. + fn parse_pat_range_begin_with( + &mut self, + begin: P<Expr>, + re: Spanned<RangeEnd>, + ) -> PResult<'a, PatKind> { + let end = if self.is_pat_range_end_start(0) { + // Parsing e.g. `X..=Y`. + Some(self.parse_pat_range_end()?) + } else { + // Parsing e.g. `X..`. + self.sess.gated_spans.gate(sym::half_open_range_patterns, begin.span.to(re.span)); + if let RangeEnd::Included(_) = re.node { + // FIXME(Centril): Consider semantic errors instead in `ast_validation`. + // Possibly also do this for `X..=` in *expression* contexts. + self.error_inclusive_range_with_no_end(re.span); + } + None + }; + Ok(PatKind::Range(Some(begin), end, re)) + } + + pub(super) fn error_inclusive_range_with_no_end(&self, span: Span) { + struct_span_err!(self.sess.span_diagnostic, span, E0586, "inclusive range with no end") + .span_suggestion_short( + span, + "use `..` instead", + "..".to_string(), + Applicability::MachineApplicable, + ) + .note("inclusive ranges must be bounded at the end (`..=b` or `a..=b`)") + .emit(); + } + + /// Parse a range-to pattern, `..X` or `..=X` where `X` remains to be parsed. + /// + /// The form `...X` is prohibited to reduce confusion with the potential + /// expression syntax `...expr` for splatting in expressions. + fn parse_pat_range_to(&mut self, mut re: Spanned<RangeEnd>) -> PResult<'a, PatKind> { + let end = self.parse_pat_range_end()?; + self.sess.gated_spans.gate(sym::half_open_range_patterns, re.span.to(self.prev_token.span)); + if let RangeEnd::Included(ref mut syn @ RangeSyntax::DotDotDot) = &mut re.node { + *syn = RangeSyntax::DotDotEq; + self.struct_span_err(re.span, "range-to patterns with `...` are not allowed") + .span_suggestion_short( + re.span, + "use `..=` instead", + "..=".to_string(), + Applicability::MachineApplicable, + ) + .emit(); + } + Ok(PatKind::Range(None, Some(end), re)) + } + + /// Is the token `dist` away from the current suitable as the start of a range patterns end? + fn is_pat_range_end_start(&self, dist: usize) -> bool { + self.look_ahead(dist, |t| { + t.is_path_start() // e.g. `MY_CONST`; + || t.kind == token::Dot // e.g. `.5` for recovery; + || t.can_begin_literal_maybe_minus() // e.g. `42`. + || t.is_whole_expr() + }) + } + + fn parse_pat_range_end(&mut self) -> PResult<'a, P<Expr>> { + if self.check_path() { + let lo = self.token.span; + let (qself, path) = if self.eat_lt() { + // Parse a qualified path + let (qself, path) = self.parse_qpath(PathStyle::Expr)?; + (Some(qself), path) + } else { + // Parse an unqualified path + (None, self.parse_path(PathStyle::Expr)?) + }; + let hi = self.prev_token.span; + Ok(self.mk_expr(lo.to(hi), ExprKind::Path(qself, path), AttrVec::new())) + } else { + self.parse_literal_maybe_minus() + } + } + + /// Is this the start of a pattern beginning with a path? + fn is_start_of_pat_with_path(&mut self) -> bool { + self.check_path() + // Just for recovery (see `can_be_ident`). + || self.token.is_ident() && !self.token.is_bool_lit() && !self.token.is_keyword(kw::In) + } + + /// Would `parse_pat_ident` be appropriate here? + fn can_be_ident_pat(&mut self) -> bool { + self.check_ident() + && !self.token.is_bool_lit() // Avoid `true` or `false` as a binding as it is a literal. + && !self.token.is_path_segment_keyword() // Avoid e.g. `Self` as it is a path. + // Avoid `in`. Due to recovery in the list parser this messes with `for ( $pat in $expr )`. + && !self.token.is_keyword(kw::In) + && self.look_ahead(1, |t| match t.kind { // Try to do something more complex? + token::OpenDelim(token::Paren) // A tuple struct pattern. + | token::OpenDelim(token::Brace) // A struct pattern. + | token::DotDotDot | token::DotDotEq | token::DotDot // A range pattern. + | token::ModSep // A tuple / struct variant pattern. + | token::Not => false, // A macro expanding to a pattern. + _ => true, + }) + } + + /// Parses `ident` or `ident @ pat`. + /// Used by the copy foo and ref foo patterns to give a good + /// error message when parsing mistakes like `ref foo(a, b)`. + fn parse_pat_ident(&mut self, binding_mode: BindingMode) -> PResult<'a, PatKind> { + let ident = self.parse_ident()?; + let sub = if self.eat(&token::At) { + Some(self.parse_pat(Some("binding pattern"))?) + } else { + None + }; + + // Just to be friendly, if they write something like `ref Some(i)`, + // we end up here with `(` as the current token. + // This shortly leads to a parse error. Note that if there is no explicit + // binding mode then we do not end up here, because the lookahead + // will direct us over to `parse_enum_variant()`. + if self.token == token::OpenDelim(token::Paren) { + return Err(self + .struct_span_err(self.prev_token.span, "expected identifier, found enum pattern")); + } + + Ok(PatKind::Ident(binding_mode, ident, sub)) + } + + /// Parse a struct ("record") pattern (e.g. `Foo { ... }` or `Foo::Bar { ... }`). + fn parse_pat_struct(&mut self, qself: Option<QSelf>, path: Path) -> PResult<'a, PatKind> { + if qself.is_some() { + return self.error_qpath_before_pat(&path, "{"); + } + self.bump(); + let (fields, etc) = self.parse_pat_fields().unwrap_or_else(|mut e| { + e.emit(); + self.recover_stmt(); + (vec![], true) + }); + self.bump(); + Ok(PatKind::Struct(path, fields, etc)) + } + + /// Parse tuple struct or tuple variant pattern (e.g. `Foo(...)` or `Foo::Bar(...)`). + fn parse_pat_tuple_struct(&mut self, qself: Option<QSelf>, path: Path) -> PResult<'a, PatKind> { + if qself.is_some() { + return self.error_qpath_before_pat(&path, "("); + } + let (fields, _) = self.parse_paren_comma_seq(|p| p.parse_pat_with_or_inner())?; + Ok(PatKind::TupleStruct(path, fields)) + } + + /// Error when there's a qualified path, e.g. `<Foo as Bar>::Baz` + /// as the path of e.g., a tuple or record struct pattern. + fn error_qpath_before_pat(&mut self, path: &Path, token: &str) -> PResult<'a, PatKind> { + let msg = &format!("unexpected `{}` after qualified path", token); + let mut err = self.struct_span_err(self.token.span, msg); + err.span_label(self.token.span, msg); + err.span_label(path.span, "the qualified path"); + Err(err) + } + + /// Parses the fields of a struct-like pattern. + fn parse_pat_fields(&mut self) -> PResult<'a, (Vec<FieldPat>, bool)> { + let mut fields = Vec::new(); + let mut etc = false; + let mut ate_comma = true; + let mut delayed_err: Option<DiagnosticBuilder<'a>> = None; + let mut etc_span = None; + + while self.token != token::CloseDelim(token::Brace) { + let attrs = match self.parse_outer_attributes() { + Ok(attrs) => attrs, + Err(err) => { + if let Some(mut delayed) = delayed_err { + delayed.emit(); + } + return Err(err); + } + }; + let lo = self.token.span; + + // check that a comma comes after every field + if !ate_comma { + let err = self.struct_span_err(self.prev_token.span, "expected `,`"); + if let Some(mut delayed) = delayed_err { + delayed.emit(); + } + return Err(err); + } + ate_comma = false; + + if self.check(&token::DotDot) || self.token == token::DotDotDot { + etc = true; + let mut etc_sp = self.token.span; + + self.recover_one_fewer_dotdot(); + self.bump(); // `..` || `...` + + if self.token == token::CloseDelim(token::Brace) { + etc_span = Some(etc_sp); + break; + } + let token_str = super::token_descr(&self.token); + let msg = &format!("expected `}}`, found {}", token_str); + let mut err = self.struct_span_err(self.token.span, msg); + + err.span_label(self.token.span, "expected `}`"); + let mut comma_sp = None; + if self.token == token::Comma { + // Issue #49257 + let nw_span = self.sess.source_map().span_until_non_whitespace(self.token.span); + etc_sp = etc_sp.to(nw_span); + err.span_label( + etc_sp, + "`..` must be at the end and cannot have a trailing comma", + ); + comma_sp = Some(self.token.span); + self.bump(); + ate_comma = true; + } + + etc_span = Some(etc_sp.until(self.token.span)); + if self.token == token::CloseDelim(token::Brace) { + // If the struct looks otherwise well formed, recover and continue. + if let Some(sp) = comma_sp { + err.span_suggestion_short( + sp, + "remove this comma", + String::new(), + Applicability::MachineApplicable, + ); + } + err.emit(); + break; + } else if self.token.is_ident() && ate_comma { + // Accept fields coming after `..,`. + // This way we avoid "pattern missing fields" errors afterwards. + // We delay this error until the end in order to have a span for a + // suggested fix. + if let Some(mut delayed_err) = delayed_err { + delayed_err.emit(); + return Err(err); + } else { + delayed_err = Some(err); + } + } else { + if let Some(mut err) = delayed_err { + err.emit(); + } + return Err(err); + } + } + + fields.push(match self.parse_pat_field(lo, attrs) { + Ok(field) => field, + Err(err) => { + if let Some(mut delayed_err) = delayed_err { + delayed_err.emit(); + } + return Err(err); + } + }); + ate_comma = self.eat(&token::Comma); + } + + if let Some(mut err) = delayed_err { + if let Some(etc_span) = etc_span { + err.multipart_suggestion( + "move the `..` to the end of the field list", + vec![ + (etc_span, String::new()), + (self.token.span, format!("{}.. }}", if ate_comma { "" } else { ", " })), + ], + Applicability::MachineApplicable, + ); + } + err.emit(); + } + Ok((fields, etc)) + } + + /// Recover on `...` as if it were `..` to avoid further errors. + /// See issue #46718. + fn recover_one_fewer_dotdot(&self) { + if self.token != token::DotDotDot { + return; + } + + self.struct_span_err(self.token.span, "expected field pattern, found `...`") + .span_suggestion( + self.token.span, + "to omit remaining fields, use one fewer `.`", + "..".to_owned(), + Applicability::MachineApplicable, + ) + .emit(); + } + + fn parse_pat_field(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, FieldPat> { + // Check if a colon exists one ahead. This means we're parsing a fieldname. + let hi; + let (subpat, fieldname, is_shorthand) = if self.look_ahead(1, |t| t == &token::Colon) { + // Parsing a pattern of the form `fieldname: pat`. + let fieldname = self.parse_field_name()?; + self.bump(); + let pat = self.parse_pat_with_or_inner()?; + hi = pat.span; + (pat, fieldname, false) + } else { + // Parsing a pattern of the form `(box) (ref) (mut) fieldname`. + let is_box = self.eat_keyword(kw::Box); + let boxed_span = self.token.span; + let is_ref = self.eat_keyword(kw::Ref); + let is_mut = self.eat_keyword(kw::Mut); + let fieldname = self.parse_ident()?; + hi = self.prev_token.span; + + let bind_type = match (is_ref, is_mut) { + (true, true) => BindingMode::ByRef(Mutability::Mut), + (true, false) => BindingMode::ByRef(Mutability::Not), + (false, true) => BindingMode::ByValue(Mutability::Mut), + (false, false) => BindingMode::ByValue(Mutability::Not), + }; + + let fieldpat = self.mk_pat_ident(boxed_span.to(hi), bind_type, fieldname); + let subpat = + if is_box { self.mk_pat(lo.to(hi), PatKind::Box(fieldpat)) } else { fieldpat }; + (subpat, fieldname, true) + }; + + Ok(FieldPat { + ident: fieldname, + pat: subpat, + is_shorthand, + attrs: attrs.into(), + id: ast::DUMMY_NODE_ID, + span: lo.to(hi), + is_placeholder: false, + }) + } + + pub(super) fn mk_pat_ident(&self, span: Span, bm: BindingMode, ident: Ident) -> P<Pat> { + self.mk_pat(span, PatKind::Ident(bm, ident, None)) + } + + fn mk_pat(&self, span: Span, kind: PatKind) -> P<Pat> { + P(Pat { kind, span, id: ast::DUMMY_NODE_ID, tokens: None }) + } +} diff --git a/compiler/rustc_parse/src/parser/path.rs b/compiler/rustc_parse/src/parser/path.rs new file mode 100644 index 00000000000..54b4df8613f --- /dev/null +++ b/compiler/rustc_parse/src/parser/path.rs @@ -0,0 +1,516 @@ +use super::ty::{AllowPlus, RecoverQPath}; +use super::{Parser, TokenType}; +use crate::maybe_whole; +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Token}; +use rustc_ast::{ + self as ast, AngleBracketedArg, AngleBracketedArgs, GenericArg, ParenthesizedArgs, +}; +use rustc_ast::{AnonConst, AssocTyConstraint, AssocTyConstraintKind, BlockCheckMode}; +use rustc_ast::{Path, PathSegment, QSelf}; +use rustc_errors::{pluralize, Applicability, PResult}; +use rustc_span::source_map::{BytePos, Span}; +use rustc_span::symbol::{kw, sym, Ident}; + +use std::mem; +use tracing::debug; + +/// Specifies how to parse a path. +#[derive(Copy, Clone, PartialEq)] +pub enum PathStyle { + /// In some contexts, notably in expressions, paths with generic arguments are ambiguous + /// with something else. For example, in expressions `segment < ....` can be interpreted + /// as a comparison and `segment ( ....` can be interpreted as a function call. + /// In all such contexts the non-path interpretation is preferred by default for practical + /// reasons, but the path interpretation can be forced by the disambiguator `::`, e.g. + /// `x<y>` - comparisons, `x::<y>` - unambiguously a path. + Expr, + /// In other contexts, notably in types, no ambiguity exists and paths can be written + /// without the disambiguator, e.g., `x<y>` - unambiguously a path. + /// Paths with disambiguators are still accepted, `x::<Y>` - unambiguously a path too. + Type, + /// A path with generic arguments disallowed, e.g., `foo::bar::Baz`, used in imports, + /// visibilities or attributes. + /// Technically, this variant is unnecessary and e.g., `Expr` can be used instead + /// (paths in "mod" contexts have to be checked later for absence of generic arguments + /// anyway, due to macros), but it is used to avoid weird suggestions about expected + /// tokens when something goes wrong. + Mod, +} + +impl<'a> Parser<'a> { + /// Parses a qualified path. + /// Assumes that the leading `<` has been parsed already. + /// + /// `qualified_path = <type [as trait_ref]>::path` + /// + /// # Examples + /// `<T>::default` + /// `<T as U>::a` + /// `<T as U>::F::a<S>` (without disambiguator) + /// `<T as U>::F::a::<S>` (with disambiguator) + pub(super) fn parse_qpath(&mut self, style: PathStyle) -> PResult<'a, (QSelf, Path)> { + let lo = self.prev_token.span; + let ty = self.parse_ty()?; + + // `path` will contain the prefix of the path up to the `>`, + // if any (e.g., `U` in the `<T as U>::*` examples + // above). `path_span` has the span of that path, or an empty + // span in the case of something like `<T>::Bar`. + let (mut path, path_span); + if self.eat_keyword(kw::As) { + let path_lo = self.token.span; + path = self.parse_path(PathStyle::Type)?; + path_span = path_lo.to(self.prev_token.span); + } else { + path_span = self.token.span.to(self.token.span); + path = ast::Path { segments: Vec::new(), span: path_span }; + } + + // See doc comment for `unmatched_angle_bracket_count`. + self.expect(&token::Gt)?; + if self.unmatched_angle_bracket_count > 0 { + self.unmatched_angle_bracket_count -= 1; + debug!("parse_qpath: (decrement) count={:?}", self.unmatched_angle_bracket_count); + } + + if !self.recover_colon_before_qpath_proj() { + self.expect(&token::ModSep)?; + } + + let qself = QSelf { ty, path_span, position: path.segments.len() }; + self.parse_path_segments(&mut path.segments, style)?; + + Ok((qself, Path { segments: path.segments, span: lo.to(self.prev_token.span) })) + } + + /// Recover from an invalid single colon, when the user likely meant a qualified path. + /// We avoid emitting this if not followed by an identifier, as our assumption that the user + /// intended this to be a qualified path may not be correct. + /// + /// ```ignore (diagnostics) + /// <Bar as Baz<T>>:Qux + /// ^ help: use double colon + /// ``` + fn recover_colon_before_qpath_proj(&mut self) -> bool { + if self.token.kind != token::Colon + || self.look_ahead(1, |t| !t.is_ident() || t.is_reserved_ident()) + { + return false; + } + + self.bump(); // colon + + self.diagnostic() + .struct_span_err( + self.prev_token.span, + "found single colon before projection in qualified path", + ) + .span_suggestion( + self.prev_token.span, + "use double colon", + "::".to_string(), + Applicability::MachineApplicable, + ) + .emit(); + + true + } + + /// Parses simple paths. + /// + /// `path = [::] segment+` + /// `segment = ident | ident[::]<args> | ident[::](args) [-> type]` + /// + /// # Examples + /// `a::b::C<D>` (without disambiguator) + /// `a::b::C::<D>` (with disambiguator) + /// `Fn(Args)` (without disambiguator) + /// `Fn::(Args)` (with disambiguator) + pub(super) fn parse_path(&mut self, style: PathStyle) -> PResult<'a, Path> { + maybe_whole!(self, NtPath, |path| { + if style == PathStyle::Mod && path.segments.iter().any(|segment| segment.args.is_some()) + { + self.struct_span_err(path.span, "unexpected generic arguments in path").emit(); + } + path + }); + + let lo = self.token.span; + let mut segments = Vec::new(); + let mod_sep_ctxt = self.token.span.ctxt(); + if self.eat(&token::ModSep) { + segments.push(PathSegment::path_root(lo.shrink_to_lo().with_ctxt(mod_sep_ctxt))); + } + self.parse_path_segments(&mut segments, style)?; + + Ok(Path { segments, span: lo.to(self.prev_token.span) }) + } + + pub(super) fn parse_path_segments( + &mut self, + segments: &mut Vec<PathSegment>, + style: PathStyle, + ) -> PResult<'a, ()> { + loop { + let segment = self.parse_path_segment(style)?; + if style == PathStyle::Expr { + // In order to check for trailing angle brackets, we must have finished + // recursing (`parse_path_segment` can indirectly call this function), + // that is, the next token must be the highlighted part of the below example: + // + // `Foo::<Bar as Baz<T>>::Qux` + // ^ here + // + // As opposed to the below highlight (if we had only finished the first + // recursion): + // + // `Foo::<Bar as Baz<T>>::Qux` + // ^ here + // + // `PathStyle::Expr` is only provided at the root invocation and never in + // `parse_path_segment` to recurse and therefore can be checked to maintain + // this invariant. + self.check_trailing_angle_brackets(&segment, &[&token::ModSep]); + } + segments.push(segment); + + if self.is_import_coupler() || !self.eat(&token::ModSep) { + return Ok(()); + } + } + } + + pub(super) fn parse_path_segment(&mut self, style: PathStyle) -> PResult<'a, PathSegment> { + let ident = self.parse_path_segment_ident()?; + + let is_args_start = |token: &Token| match token.kind { + token::Lt + | token::BinOp(token::Shl) + | token::OpenDelim(token::Paren) + | token::LArrow => true, + _ => false, + }; + let check_args_start = |this: &mut Self| { + this.expected_tokens.extend_from_slice(&[ + TokenType::Token(token::Lt), + TokenType::Token(token::OpenDelim(token::Paren)), + ]); + is_args_start(&this.token) + }; + + Ok( + if style == PathStyle::Type && check_args_start(self) + || style != PathStyle::Mod + && self.check(&token::ModSep) + && self.look_ahead(1, |t| is_args_start(t)) + { + // We use `style == PathStyle::Expr` to check if this is in a recursion or not. If + // it isn't, then we reset the unmatched angle bracket count as we're about to start + // parsing a new path. + if style == PathStyle::Expr { + self.unmatched_angle_bracket_count = 0; + self.max_angle_bracket_count = 0; + } + + // Generic arguments are found - `<`, `(`, `::<` or `::(`. + self.eat(&token::ModSep); + let lo = self.token.span; + let args = if self.eat_lt() { + // `<'a, T, A = U>` + let args = + self.parse_angle_args_with_leading_angle_bracket_recovery(style, lo)?; + self.expect_gt()?; + let span = lo.to(self.prev_token.span); + AngleBracketedArgs { args, span }.into() + } else { + // `(T, U) -> R` + let (inputs, _) = self.parse_paren_comma_seq(|p| p.parse_ty())?; + let span = ident.span.to(self.prev_token.span); + let output = self.parse_ret_ty(AllowPlus::No, RecoverQPath::No)?; + ParenthesizedArgs { inputs, output, span }.into() + }; + + PathSegment { ident, args, id: ast::DUMMY_NODE_ID } + } else { + // Generic arguments are not found. + PathSegment::from_ident(ident) + }, + ) + } + + pub(super) fn parse_path_segment_ident(&mut self) -> PResult<'a, Ident> { + match self.token.ident() { + Some((ident, false)) if ident.is_path_segment_keyword() => { + self.bump(); + Ok(ident) + } + _ => self.parse_ident(), + } + } + + /// Parses generic args (within a path segment) with recovery for extra leading angle brackets. + /// For the purposes of understanding the parsing logic of generic arguments, this function + /// can be thought of being the same as just calling `self.parse_angle_args()` if the source + /// had the correct amount of leading angle brackets. + /// + /// ```ignore (diagnostics) + /// bar::<<<<T as Foo>::Output>(); + /// ^^ help: remove extra angle brackets + /// ``` + fn parse_angle_args_with_leading_angle_bracket_recovery( + &mut self, + style: PathStyle, + lo: Span, + ) -> PResult<'a, Vec<AngleBracketedArg>> { + // We need to detect whether there are extra leading left angle brackets and produce an + // appropriate error and suggestion. This cannot be implemented by looking ahead at + // upcoming tokens for a matching `>` character - if there are unmatched `<` tokens + // then there won't be matching `>` tokens to find. + // + // To explain how this detection works, consider the following example: + // + // ```ignore (diagnostics) + // bar::<<<<T as Foo>::Output>(); + // ^^ help: remove extra angle brackets + // ``` + // + // Parsing of the left angle brackets starts in this function. We start by parsing the + // `<` token (incrementing the counter of unmatched angle brackets on `Parser` via + // `eat_lt`): + // + // *Upcoming tokens:* `<<<<T as Foo>::Output>;` + // *Unmatched count:* 1 + // *`parse_path_segment` calls deep:* 0 + // + // This has the effect of recursing as this function is called if a `<` character + // is found within the expected generic arguments: + // + // *Upcoming tokens:* `<<<T as Foo>::Output>;` + // *Unmatched count:* 2 + // *`parse_path_segment` calls deep:* 1 + // + // Eventually we will have recursed until having consumed all of the `<` tokens and + // this will be reflected in the count: + // + // *Upcoming tokens:* `T as Foo>::Output>;` + // *Unmatched count:* 4 + // `parse_path_segment` calls deep:* 3 + // + // The parser will continue until reaching the first `>` - this will decrement the + // unmatched angle bracket count and return to the parent invocation of this function + // having succeeded in parsing: + // + // *Upcoming tokens:* `::Output>;` + // *Unmatched count:* 3 + // *`parse_path_segment` calls deep:* 2 + // + // This will continue until the next `>` character which will also return successfully + // to the parent invocation of this function and decrement the count: + // + // *Upcoming tokens:* `;` + // *Unmatched count:* 2 + // *`parse_path_segment` calls deep:* 1 + // + // At this point, this function will expect to find another matching `>` character but + // won't be able to and will return an error. This will continue all the way up the + // call stack until the first invocation: + // + // *Upcoming tokens:* `;` + // *Unmatched count:* 2 + // *`parse_path_segment` calls deep:* 0 + // + // In doing this, we have managed to work out how many unmatched leading left angle + // brackets there are, but we cannot recover as the unmatched angle brackets have + // already been consumed. To remedy this, we keep a snapshot of the parser state + // before we do the above. We can then inspect whether we ended up with a parsing error + // and unmatched left angle brackets and if so, restore the parser state before we + // consumed any `<` characters to emit an error and consume the erroneous tokens to + // recover by attempting to parse again. + // + // In practice, the recursion of this function is indirect and there will be other + // locations that consume some `<` characters - as long as we update the count when + // this happens, it isn't an issue. + + let is_first_invocation = style == PathStyle::Expr; + // Take a snapshot before attempting to parse - we can restore this later. + let snapshot = if is_first_invocation { Some(self.clone()) } else { None }; + + debug!("parse_generic_args_with_leading_angle_bracket_recovery: (snapshotting)"); + match self.parse_angle_args() { + Ok(args) => Ok(args), + Err(ref mut e) if is_first_invocation && self.unmatched_angle_bracket_count > 0 => { + // Cancel error from being unable to find `>`. We know the error + // must have been this due to a non-zero unmatched angle bracket + // count. + e.cancel(); + + // Swap `self` with our backup of the parser state before attempting to parse + // generic arguments. + let snapshot = mem::replace(self, snapshot.unwrap()); + + debug!( + "parse_generic_args_with_leading_angle_bracket_recovery: (snapshot failure) \ + snapshot.count={:?}", + snapshot.unmatched_angle_bracket_count, + ); + + // Eat the unmatched angle brackets. + for _ in 0..snapshot.unmatched_angle_bracket_count { + self.eat_lt(); + } + + // Make a span over ${unmatched angle bracket count} characters. + let span = lo.with_hi(lo.lo() + BytePos(snapshot.unmatched_angle_bracket_count)); + self.struct_span_err( + span, + &format!( + "unmatched angle bracket{}", + pluralize!(snapshot.unmatched_angle_bracket_count) + ), + ) + .span_suggestion( + span, + &format!( + "remove extra angle bracket{}", + pluralize!(snapshot.unmatched_angle_bracket_count) + ), + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + + // Try again without unmatched angle bracket characters. + self.parse_angle_args() + } + Err(e) => Err(e), + } + } + + /// Parses (possibly empty) list of generic arguments / associated item constraints, + /// possibly including trailing comma. + pub(super) fn parse_angle_args(&mut self) -> PResult<'a, Vec<AngleBracketedArg>> { + let mut args = Vec::new(); + while let Some(arg) = self.parse_angle_arg()? { + args.push(arg); + if !self.eat(&token::Comma) { + break; + } + } + Ok(args) + } + + /// Parses a single argument in the angle arguments `<...>` of a path segment. + fn parse_angle_arg(&mut self) -> PResult<'a, Option<AngleBracketedArg>> { + if self.check_ident() && self.look_ahead(1, |t| matches!(t.kind, token::Eq | token::Colon)) + { + // Parse associated type constraint. + let lo = self.token.span; + let ident = self.parse_ident()?; + let kind = if self.eat(&token::Eq) { + let ty = self.parse_assoc_equality_term(ident, self.prev_token.span)?; + AssocTyConstraintKind::Equality { ty } + } else if self.eat(&token::Colon) { + let bounds = self.parse_generic_bounds(Some(self.prev_token.span))?; + AssocTyConstraintKind::Bound { bounds } + } else { + unreachable!(); + }; + + let span = lo.to(self.prev_token.span); + + // Gate associated type bounds, e.g., `Iterator<Item: Ord>`. + if let AssocTyConstraintKind::Bound { .. } = kind { + self.sess.gated_spans.gate(sym::associated_type_bounds, span); + } + + let constraint = AssocTyConstraint { id: ast::DUMMY_NODE_ID, ident, kind, span }; + Ok(Some(AngleBracketedArg::Constraint(constraint))) + } else { + Ok(self.parse_generic_arg()?.map(AngleBracketedArg::Arg)) + } + } + + /// Parse the term to the right of an associated item equality constraint. + /// That is, parse `<term>` in `Item = <term>`. + /// Right now, this only admits types in `<term>`. + fn parse_assoc_equality_term(&mut self, ident: Ident, eq: Span) -> PResult<'a, P<ast::Ty>> { + let arg = self.parse_generic_arg()?; + let span = ident.span.to(self.prev_token.span); + match arg { + Some(GenericArg::Type(ty)) => return Ok(ty), + Some(GenericArg::Const(expr)) => { + self.struct_span_err(span, "cannot constrain an associated constant to a value") + .span_label(ident.span, "this associated constant...") + .span_label(expr.value.span, "...cannot be constrained to this value") + .emit(); + } + Some(GenericArg::Lifetime(lt)) => { + self.struct_span_err(span, "associated lifetimes are not supported") + .span_label(lt.ident.span, "the lifetime is given here") + .help("if you meant to specify a trait object, write `dyn Trait + 'lifetime`") + .emit(); + } + None => { + let after_eq = eq.shrink_to_hi(); + let before_next = self.token.span.shrink_to_lo(); + self.struct_span_err(after_eq.to(before_next), "missing type to the right of `=`") + .span_suggestion( + self.sess.source_map().next_point(eq).to(before_next), + "to constrain the associated type, add a type after `=`", + " TheType".to_string(), + Applicability::HasPlaceholders, + ) + .span_suggestion( + eq.to(before_next), + &format!("remove the `=` if `{}` is a type", ident), + String::new(), + Applicability::MaybeIncorrect, + ) + .emit(); + } + } + Ok(self.mk_ty(span, ast::TyKind::Err)) + } + + /// Parse a generic argument in a path segment. + /// This does not include constraints, e.g., `Item = u8`, which is handled in `parse_angle_arg`. + fn parse_generic_arg(&mut self) -> PResult<'a, Option<GenericArg>> { + let arg = if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) { + // Parse lifetime argument. + GenericArg::Lifetime(self.expect_lifetime()) + } else if self.check_const_arg() { + // Parse const argument. + let expr = if let token::OpenDelim(token::Brace) = self.token.kind { + self.parse_block_expr( + None, + self.token.span, + BlockCheckMode::Default, + ast::AttrVec::new(), + )? + } else if self.token.is_ident() { + // FIXME(const_generics): to distinguish between idents for types and consts, + // we should introduce a GenericArg::Ident in the AST and distinguish when + // lowering to the HIR. For now, idents for const args are not permitted. + if self.token.is_bool_lit() { + self.parse_literal_maybe_minus()? + } else { + let span = self.token.span; + let msg = "identifiers may currently not be used for const generics"; + self.struct_span_err(span, msg).emit(); + let block = self.mk_block_err(span); + self.mk_expr(span, ast::ExprKind::Block(block, None), ast::AttrVec::new()) + } + } else { + self.parse_literal_maybe_minus()? + }; + GenericArg::Const(AnonConst { id: ast::DUMMY_NODE_ID, value: expr }) + } else if self.check_type() { + // Parse type argument. + GenericArg::Type(self.parse_ty()?) + } else { + return Ok(None); + }; + Ok(Some(arg)) + } +} diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs new file mode 100644 index 00000000000..ac067cb0eab --- /dev/null +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -0,0 +1,427 @@ +use super::attr::DEFAULT_INNER_ATTR_FORBIDDEN; +use super::diagnostics::Error; +use super::expr::LhsExpr; +use super::pat::GateOr; +use super::path::PathStyle; +use super::{BlockMode, Parser, Restrictions, SemiColonMode}; +use crate::maybe_whole; + +use rustc_ast as ast; +use rustc_ast::ptr::P; +use rustc_ast::token::{self, TokenKind}; +use rustc_ast::util::classify; +use rustc_ast::{AttrStyle, AttrVec, Attribute, MacCall, MacStmtStyle}; +use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt, StmtKind, DUMMY_NODE_ID}; +use rustc_errors::{Applicability, PResult}; +use rustc_span::source_map::{BytePos, Span}; +use rustc_span::symbol::{kw, sym}; + +use std::mem; + +impl<'a> Parser<'a> { + /// Parses a statement. This stops just before trailing semicolons on everything but items. + /// e.g., a `StmtKind::Semi` parses to a `StmtKind::Expr`, leaving the trailing `;` unconsumed. + pub(super) fn parse_stmt(&mut self) -> PResult<'a, Option<Stmt>> { + Ok(self.parse_stmt_without_recovery().unwrap_or_else(|mut e| { + e.emit(); + self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore); + None + })) + } + + fn parse_stmt_without_recovery(&mut self) -> PResult<'a, Option<Stmt>> { + maybe_whole!(self, NtStmt, |x| Some(x)); + + let attrs = self.parse_outer_attributes()?; + let lo = self.token.span; + + let stmt = if self.eat_keyword(kw::Let) { + self.parse_local_mk(lo, attrs.into())? + } else if self.is_kw_followed_by_ident(kw::Mut) { + self.recover_stmt_local(lo, attrs.into(), "missing keyword", "let mut")? + } else if self.is_kw_followed_by_ident(kw::Auto) { + self.bump(); // `auto` + let msg = "write `let` instead of `auto` to introduce a new variable"; + self.recover_stmt_local(lo, attrs.into(), msg, "let")? + } else if self.is_kw_followed_by_ident(sym::var) { + self.bump(); // `var` + let msg = "write `let` instead of `var` to introduce a new variable"; + self.recover_stmt_local(lo, attrs.into(), msg, "let")? + } else if self.check_path() && !self.token.is_qpath_start() && !self.is_path_start_item() { + // We have avoided contextual keywords like `union`, items with `crate` visibility, + // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something + // that starts like a path (1 token), but it fact not a path. + // Also, we avoid stealing syntax from `parse_item_`. + self.parse_stmt_path_start(lo, attrs)? + } else if let Some(item) = self.parse_item_common(attrs.clone(), false, true, |_| true)? { + // FIXME: Bad copy of attrs + self.mk_stmt(lo.to(item.span), StmtKind::Item(P(item))) + } else if self.eat(&token::Semi) { + // Do not attempt to parse an expression if we're done here. + self.error_outer_attrs(&attrs); + self.mk_stmt(lo, StmtKind::Empty) + } else if self.token != token::CloseDelim(token::Brace) { + // Remainder are line-expr stmts. + let e = self.parse_expr_res(Restrictions::STMT_EXPR, Some(attrs.into()))?; + self.mk_stmt(lo.to(e.span), StmtKind::Expr(e)) + } else { + self.error_outer_attrs(&attrs); + return Ok(None); + }; + Ok(Some(stmt)) + } + + fn parse_stmt_path_start(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, Stmt> { + let path = self.parse_path(PathStyle::Expr)?; + + if self.eat(&token::Not) { + return self.parse_stmt_mac(lo, attrs.into(), path); + } + + let expr = if self.check(&token::OpenDelim(token::Brace)) { + self.parse_struct_expr(path, AttrVec::new())? + } else { + let hi = self.prev_token.span; + self.mk_expr(lo.to(hi), ExprKind::Path(None, path), AttrVec::new()) + }; + + let expr = self.with_res(Restrictions::STMT_EXPR, |this| { + let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs.into())?; + this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr)) + })?; + Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr))) + } + + /// Parses a statement macro `mac!(args)` provided a `path` representing `mac`. + /// At this point, the `!` token after the path has already been eaten. + fn parse_stmt_mac(&mut self, lo: Span, attrs: AttrVec, path: ast::Path) -> PResult<'a, Stmt> { + let args = self.parse_mac_args()?; + let delim = args.delim(); + let hi = self.prev_token.span; + + let style = + if delim == token::Brace { MacStmtStyle::Braces } else { MacStmtStyle::NoBraces }; + + let mac = MacCall { path, args, prior_type_ascription: self.last_type_ascription }; + + let kind = if delim == token::Brace || self.token == token::Semi || self.token == token::Eof + { + StmtKind::MacCall(P((mac, style, attrs))) + } else { + // Since none of the above applied, this is an expression statement macro. + let e = self.mk_expr(lo.to(hi), ExprKind::MacCall(mac), AttrVec::new()); + let e = self.maybe_recover_from_bad_qpath(e, true)?; + let e = self.parse_dot_or_call_expr_with(e, lo, attrs)?; + let e = self.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(e))?; + StmtKind::Expr(e) + }; + Ok(self.mk_stmt(lo.to(hi), kind)) + } + + /// Error on outer attributes in this context. + /// Also error if the previous token was a doc comment. + fn error_outer_attrs(&self, attrs: &[Attribute]) { + if let [.., last] = attrs { + if last.is_doc_comment() { + self.span_fatal_err(last.span, Error::UselessDocComment).emit(); + } else if attrs.iter().any(|a| a.style == AttrStyle::Outer) { + self.struct_span_err(last.span, "expected statement after outer attribute").emit(); + } + } + } + + fn recover_stmt_local( + &mut self, + lo: Span, + attrs: AttrVec, + msg: &str, + sugg: &str, + ) -> PResult<'a, Stmt> { + let stmt = self.parse_local_mk(lo, attrs)?; + self.struct_span_err(lo, "invalid variable declaration") + .span_suggestion(lo, msg, sugg.to_string(), Applicability::MachineApplicable) + .emit(); + Ok(stmt) + } + + fn parse_local_mk(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> { + let local = self.parse_local(attrs)?; + Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Local(local))) + } + + /// Parses a local variable declaration. + fn parse_local(&mut self, attrs: AttrVec) -> PResult<'a, P<Local>> { + let lo = self.prev_token.span; + let pat = self.parse_top_pat(GateOr::Yes)?; + + let (err, ty) = if self.eat(&token::Colon) { + // Save the state of the parser before parsing type normally, in case there is a `:` + // instead of an `=` typo. + let parser_snapshot_before_type = self.clone(); + let colon_sp = self.prev_token.span; + match self.parse_ty() { + Ok(ty) => (None, Some(ty)), + Err(mut err) => { + if let Ok(snip) = self.span_to_snippet(pat.span) { + err.span_label(pat.span, format!("while parsing the type for `{}`", snip)); + } + let err = if self.check(&token::Eq) { + err.emit(); + None + } else { + // Rewind to before attempting to parse the type and continue parsing. + let parser_snapshot_after_type = + mem::replace(self, parser_snapshot_before_type); + Some((parser_snapshot_after_type, colon_sp, err)) + }; + (err, None) + } + } + } else { + (None, None) + }; + let init = match (self.parse_initializer(err.is_some()), err) { + (Ok(init), None) => { + // init parsed, ty parsed + init + } + (Ok(init), Some((_, colon_sp, mut err))) => { + // init parsed, ty error + // Could parse the type as if it were the initializer, it is likely there was a + // typo in the code: `:` instead of `=`. Add suggestion and emit the error. + err.span_suggestion_short( + colon_sp, + "use `=` if you meant to assign", + " =".to_string(), + Applicability::MachineApplicable, + ); + err.emit(); + // As this was parsed successfully, continue as if the code has been fixed for the + // rest of the file. It will still fail due to the emitted error, but we avoid + // extra noise. + init + } + (Err(mut init_err), Some((snapshot, _, ty_err))) => { + // init error, ty error + init_err.cancel(); + // Couldn't parse the type nor the initializer, only raise the type error and + // return to the parser state before parsing the type as the initializer. + // let x: <parse_error>; + *self = snapshot; + return Err(ty_err); + } + (Err(err), None) => { + // init error, ty parsed + // Couldn't parse the initializer and we're not attempting to recover a failed + // parse of the type, return the error. + return Err(err); + } + }; + let hi = if self.token == token::Semi { self.token.span } else { self.prev_token.span }; + Ok(P(ast::Local { ty, pat, init, id: DUMMY_NODE_ID, span: lo.to(hi), attrs })) + } + + /// Parses the RHS of a local variable declaration (e.g., '= 14;'). + fn parse_initializer(&mut self, eq_optional: bool) -> PResult<'a, Option<P<Expr>>> { + let eq_consumed = match self.token.kind { + token::BinOpEq(..) => { + // Recover `let x <op>= 1` as `let x = 1` + self.struct_span_err( + self.token.span, + "can't reassign to an uninitialized variable", + ) + .span_suggestion_short( + self.token.span, + "initialize the variable", + "=".to_string(), + Applicability::MaybeIncorrect, + ) + .emit(); + self.bump(); + true + } + _ => self.eat(&token::Eq), + }; + + Ok(if eq_consumed || eq_optional { Some(self.parse_expr()?) } else { None }) + } + + /// Parses a block. No inner attributes are allowed. + pub(super) fn parse_block(&mut self) -> PResult<'a, P<Block>> { + let (attrs, block) = self.parse_inner_attrs_and_block()?; + if let [.., last] = &*attrs { + self.error_on_forbidden_inner_attr(last.span, DEFAULT_INNER_ATTR_FORBIDDEN); + } + Ok(block) + } + + fn error_block_no_opening_brace<T>(&mut self) -> PResult<'a, T> { + let sp = self.token.span; + let tok = super::token_descr(&self.token); + let mut e = self.struct_span_err(sp, &format!("expected `{{`, found {}", tok)); + let do_not_suggest_help = self.token.is_keyword(kw::In) || self.token == token::Colon; + + // Check to see if the user has written something like + // + // if (cond) + // bar; + // + // which is valid in other languages, but not Rust. + match self.parse_stmt_without_recovery() { + // If the next token is an open brace (e.g., `if a b {`), the place- + // inside-a-block suggestion would be more likely wrong than right. + Ok(Some(_)) + if self.look_ahead(1, |t| t == &token::OpenDelim(token::Brace)) + || do_not_suggest_help => {} + Ok(Some(stmt)) => { + let stmt_own_line = self.sess.source_map().is_line_before_span_empty(sp); + let stmt_span = if stmt_own_line && self.eat(&token::Semi) { + // Expand the span to include the semicolon. + stmt.span.with_hi(self.prev_token.span.hi()) + } else { + stmt.span + }; + if let Ok(snippet) = self.span_to_snippet(stmt_span) { + e.span_suggestion( + stmt_span, + "try placing this code inside a block", + format!("{{ {} }}", snippet), + // Speculative; has been misleading in the past (#46836). + Applicability::MaybeIncorrect, + ); + } + } + Err(mut e) => { + self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore); + e.cancel(); + } + _ => {} + } + e.span_label(sp, "expected `{`"); + Err(e) + } + + /// Parses a block. Inner attributes are allowed. + pub(super) fn parse_inner_attrs_and_block( + &mut self, + ) -> PResult<'a, (Vec<Attribute>, P<Block>)> { + self.parse_block_common(self.token.span, BlockCheckMode::Default) + } + + /// Parses a block. Inner attributes are allowed. + pub(super) fn parse_block_common( + &mut self, + lo: Span, + blk_mode: BlockCheckMode, + ) -> PResult<'a, (Vec<Attribute>, P<Block>)> { + maybe_whole!(self, NtBlock, |x| (Vec::new(), x)); + + if !self.eat(&token::OpenDelim(token::Brace)) { + return self.error_block_no_opening_brace(); + } + + Ok((self.parse_inner_attributes()?, self.parse_block_tail(lo, blk_mode)?)) + } + + /// Parses the rest of a block expression or function body. + /// Precondition: already parsed the '{'. + fn parse_block_tail(&mut self, lo: Span, s: BlockCheckMode) -> PResult<'a, P<Block>> { + let mut stmts = vec![]; + while !self.eat(&token::CloseDelim(token::Brace)) { + if self.token == token::Eof { + break; + } + let stmt = match self.parse_full_stmt() { + Err(mut err) => { + self.maybe_annotate_with_ascription(&mut err, false); + err.emit(); + self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore); + Some(self.mk_stmt_err(self.token.span)) + } + Ok(stmt) => stmt, + }; + if let Some(stmt) = stmt { + stmts.push(stmt); + } else { + // Found only `;` or `}`. + continue; + }; + } + Ok(self.mk_block(stmts, s, lo.to(self.prev_token.span))) + } + + /// Parses a statement, including the trailing semicolon. + pub fn parse_full_stmt(&mut self) -> PResult<'a, Option<Stmt>> { + // Skip looking for a trailing semicolon when we have an interpolated statement. + maybe_whole!(self, NtStmt, |x| Some(x)); + + let mut stmt = match self.parse_stmt_without_recovery()? { + Some(stmt) => stmt, + None => return Ok(None), + }; + + let mut eat_semi = true; + match stmt.kind { + // Expression without semicolon. + StmtKind::Expr(ref expr) + if self.token != token::Eof && classify::expr_requires_semi_to_be_stmt(expr) => + { + // Just check for errors and recover; do not eat semicolon yet. + if let Err(mut e) = + self.expect_one_of(&[], &[token::Semi, token::CloseDelim(token::Brace)]) + { + if let TokenKind::DocComment(..) = self.token.kind { + if let Ok(snippet) = self.span_to_snippet(self.token.span) { + let sp = self.token.span; + let marker = &snippet[..3]; + let (comment_marker, doc_comment_marker) = marker.split_at(2); + + e.span_suggestion( + sp.with_hi(sp.lo() + BytePos(marker.len() as u32)), + &format!( + "add a space before `{}` to use a regular comment", + doc_comment_marker, + ), + format!("{} {}", comment_marker, doc_comment_marker), + Applicability::MaybeIncorrect, + ); + } + } + e.emit(); + self.recover_stmt(); + // Don't complain about type errors in body tail after parse error (#57383). + let sp = expr.span.to(self.prev_token.span); + stmt.kind = StmtKind::Expr(self.mk_expr_err(sp)); + } + } + StmtKind::Local(..) => { + self.expect_semi()?; + eat_semi = false; + } + StmtKind::Empty => eat_semi = false, + _ => {} + } + + if eat_semi && self.eat(&token::Semi) { + stmt = stmt.add_trailing_semicolon(); + } + stmt.span = stmt.span.to(self.prev_token.span); + Ok(Some(stmt)) + } + + pub(super) fn mk_block(&self, stmts: Vec<Stmt>, rules: BlockCheckMode, span: Span) -> P<Block> { + P(Block { stmts, id: DUMMY_NODE_ID, rules, span }) + } + + pub(super) fn mk_stmt(&self, span: Span, kind: StmtKind) -> Stmt { + Stmt { id: DUMMY_NODE_ID, kind, span } + } + + fn mk_stmt_err(&self, span: Span) -> Stmt { + self.mk_stmt(span, StmtKind::Expr(self.mk_expr_err(span))) + } + + pub(super) fn mk_block_err(&self, span: Span) -> P<Block> { + self.mk_block(vec![self.mk_stmt_err(span)], BlockCheckMode::Default, span) + } +} diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs new file mode 100644 index 00000000000..4356850818e --- /dev/null +++ b/compiler/rustc_parse/src/parser/ty.rs @@ -0,0 +1,631 @@ +use super::{Parser, PathStyle, TokenType}; + +use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; + +use rustc_ast::ptr::P; +use rustc_ast::token::{self, Token, TokenKind}; +use rustc_ast::{self as ast, BareFnTy, FnRetTy, GenericParam, Lifetime, MutTy, Ty, TyKind}; +use rustc_ast::{GenericBound, GenericBounds, MacCall, Mutability}; +use rustc_ast::{PolyTraitRef, TraitBoundModifier, TraitObjectSyntax}; +use rustc_errors::{pluralize, struct_span_err, Applicability, PResult}; +use rustc_span::source_map::Span; +use rustc_span::symbol::{kw, sym}; + +/// Any `?` or `?const` modifiers that appear at the start of a bound. +struct BoundModifiers { + /// `?Trait`. + maybe: Option<Span>, + + /// `?const Trait`. + maybe_const: Option<Span>, +} + +impl BoundModifiers { + fn to_trait_bound_modifier(&self) -> TraitBoundModifier { + match (self.maybe, self.maybe_const) { + (None, None) => TraitBoundModifier::None, + (Some(_), None) => TraitBoundModifier::Maybe, + (None, Some(_)) => TraitBoundModifier::MaybeConst, + (Some(_), Some(_)) => TraitBoundModifier::MaybeConstMaybe, + } + } +} + +#[derive(Copy, Clone, PartialEq)] +pub(super) enum AllowPlus { + Yes, + No, +} + +#[derive(PartialEq)] +pub(super) enum RecoverQPath { + Yes, + No, +} + +// Is `...` (`CVarArgs`) legal at this level of type parsing? +#[derive(PartialEq)] +enum AllowCVariadic { + Yes, + No, +} + +/// Returns `true` if `IDENT t` can start a type -- `IDENT::a::b`, `IDENT<u8, u8>`, +/// `IDENT<<u8 as Trait>::AssocTy>`. +/// +/// Types can also be of the form `IDENT(u8, u8) -> u8`, however this assumes +/// that `IDENT` is not the ident of a fn trait. +fn can_continue_type_after_non_fn_ident(t: &Token) -> bool { + t == &token::ModSep || t == &token::Lt || t == &token::BinOp(token::Shl) +} + +impl<'a> Parser<'a> { + /// Parses a type. + pub fn parse_ty(&mut self) -> PResult<'a, P<Ty>> { + self.parse_ty_common(AllowPlus::Yes, RecoverQPath::Yes, AllowCVariadic::No) + } + + /// Parse a type suitable for a function or function pointer parameter. + /// The difference from `parse_ty` is that this version allows `...` + /// (`CVarArgs`) at the top level of the the type. + pub(super) fn parse_ty_for_param(&mut self) -> PResult<'a, P<Ty>> { + self.parse_ty_common(AllowPlus::Yes, RecoverQPath::Yes, AllowCVariadic::Yes) + } + + /// Parses a type in restricted contexts where `+` is not permitted. + /// + /// Example 1: `&'a TYPE` + /// `+` is prohibited to maintain operator priority (P(+) < P(&)). + /// Example 2: `value1 as TYPE + value2` + /// `+` is prohibited to avoid interactions with expression grammar. + pub(super) fn parse_ty_no_plus(&mut self) -> PResult<'a, P<Ty>> { + self.parse_ty_common(AllowPlus::No, RecoverQPath::Yes, AllowCVariadic::No) + } + + /// Parses an optional return type `[ -> TY ]` in a function declaration. + pub(super) fn parse_ret_ty( + &mut self, + allow_plus: AllowPlus, + recover_qpath: RecoverQPath, + ) -> PResult<'a, FnRetTy> { + Ok(if self.eat(&token::RArrow) { + // FIXME(Centril): Can we unconditionally `allow_plus`? + let ty = self.parse_ty_common(allow_plus, recover_qpath, AllowCVariadic::No)?; + FnRetTy::Ty(ty) + } else { + FnRetTy::Default(self.token.span.shrink_to_lo()) + }) + } + + fn parse_ty_common( + &mut self, + allow_plus: AllowPlus, + recover_qpath: RecoverQPath, + allow_c_variadic: AllowCVariadic, + ) -> PResult<'a, P<Ty>> { + let allow_qpath_recovery = recover_qpath == RecoverQPath::Yes; + maybe_recover_from_interpolated_ty_qpath!(self, allow_qpath_recovery); + maybe_whole!(self, NtTy, |x| x); + + let lo = self.token.span; + let mut impl_dyn_multi = false; + let kind = if self.check(&token::OpenDelim(token::Paren)) { + self.parse_ty_tuple_or_parens(lo, allow_plus)? + } else if self.eat(&token::Not) { + // Never type `!` + TyKind::Never + } else if self.eat(&token::BinOp(token::Star)) { + self.parse_ty_ptr()? + } else if self.eat(&token::OpenDelim(token::Bracket)) { + self.parse_array_or_slice_ty()? + } else if self.check(&token::BinOp(token::And)) || self.check(&token::AndAnd) { + // Reference + self.expect_and()?; + self.parse_borrowed_pointee()? + } else if self.eat_keyword_noexpect(kw::Typeof) { + self.parse_typeof_ty()? + } else if self.eat_keyword(kw::Underscore) { + // A type to be inferred `_` + TyKind::Infer + } else if self.check_fn_front_matter() { + // Function pointer type + self.parse_ty_bare_fn(lo, Vec::new())? + } else if self.check_keyword(kw::For) { + // Function pointer type or bound list (trait object type) starting with a poly-trait. + // `for<'lt> [unsafe] [extern "ABI"] fn (&'lt S) -> T` + // `for<'lt> Trait1<'lt> + Trait2 + 'a` + let lifetime_defs = self.parse_late_bound_lifetime_defs()?; + if self.check_fn_front_matter() { + self.parse_ty_bare_fn(lo, lifetime_defs)? + } else { + let path = self.parse_path(PathStyle::Type)?; + let parse_plus = allow_plus == AllowPlus::Yes && self.check_plus(); + self.parse_remaining_bounds_path(lifetime_defs, path, lo, parse_plus)? + } + } else if self.eat_keyword(kw::Impl) { + self.parse_impl_ty(&mut impl_dyn_multi)? + } else if self.is_explicit_dyn_type() { + self.parse_dyn_ty(&mut impl_dyn_multi)? + } else if self.eat_lt() { + // Qualified path + let (qself, path) = self.parse_qpath(PathStyle::Type)?; + TyKind::Path(Some(qself), path) + } else if self.check_path() { + self.parse_path_start_ty(lo, allow_plus)? + } else if self.can_begin_bound() { + self.parse_bare_trait_object(lo, allow_plus)? + } else if self.eat(&token::DotDotDot) { + if allow_c_variadic == AllowCVariadic::Yes { + TyKind::CVarArgs + } else { + // FIXME(Centril): Should we just allow `...` syntactically + // anywhere in a type and use semantic restrictions instead? + self.error_illegal_c_varadic_ty(lo); + TyKind::Err + } + } else { + let msg = format!("expected type, found {}", super::token_descr(&self.token)); + let mut err = self.struct_span_err(self.token.span, &msg); + err.span_label(self.token.span, "expected type"); + self.maybe_annotate_with_ascription(&mut err, true); + return Err(err); + }; + + let span = lo.to(self.prev_token.span); + let ty = self.mk_ty(span, kind); + + // Try to recover from use of `+` with incorrect priority. + self.maybe_report_ambiguous_plus(allow_plus, impl_dyn_multi, &ty); + self.maybe_recover_from_bad_type_plus(allow_plus, &ty)?; + self.maybe_recover_from_bad_qpath(ty, allow_qpath_recovery) + } + + /// Parses either: + /// - `(TYPE)`, a parenthesized type. + /// - `(TYPE,)`, a tuple with a single field of type TYPE. + fn parse_ty_tuple_or_parens(&mut self, lo: Span, allow_plus: AllowPlus) -> PResult<'a, TyKind> { + let mut trailing_plus = false; + let (ts, trailing) = self.parse_paren_comma_seq(|p| { + let ty = p.parse_ty()?; + trailing_plus = p.prev_token.kind == TokenKind::BinOp(token::Plus); + Ok(ty) + })?; + + if ts.len() == 1 && !trailing { + let ty = ts.into_iter().next().unwrap().into_inner(); + let maybe_bounds = allow_plus == AllowPlus::Yes && self.token.is_like_plus(); + match ty.kind { + // `(TY_BOUND_NOPAREN) + BOUND + ...`. + TyKind::Path(None, path) if maybe_bounds => { + self.parse_remaining_bounds_path(Vec::new(), path, lo, true) + } + TyKind::TraitObject(bounds, TraitObjectSyntax::None) + if maybe_bounds && bounds.len() == 1 && !trailing_plus => + { + self.parse_remaining_bounds(bounds, true) + } + // `(TYPE)` + _ => Ok(TyKind::Paren(P(ty))), + } + } else { + Ok(TyKind::Tup(ts)) + } + } + + fn parse_bare_trait_object(&mut self, lo: Span, allow_plus: AllowPlus) -> PResult<'a, TyKind> { + let lt_no_plus = self.check_lifetime() && !self.look_ahead(1, |t| t.is_like_plus()); + let bounds = self.parse_generic_bounds_common(allow_plus, None)?; + if lt_no_plus { + self.struct_span_err(lo, "lifetime in trait object type must be followed by `+`").emit() + } + Ok(TyKind::TraitObject(bounds, TraitObjectSyntax::None)) + } + + fn parse_remaining_bounds_path( + &mut self, + generic_params: Vec<GenericParam>, + path: ast::Path, + lo: Span, + parse_plus: bool, + ) -> PResult<'a, TyKind> { + let poly_trait_ref = PolyTraitRef::new(generic_params, path, lo.to(self.prev_token.span)); + let bounds = vec![GenericBound::Trait(poly_trait_ref, TraitBoundModifier::None)]; + self.parse_remaining_bounds(bounds, parse_plus) + } + + /// Parse the remainder of a bare trait object type given an already parsed list. + fn parse_remaining_bounds( + &mut self, + mut bounds: GenericBounds, + plus: bool, + ) -> PResult<'a, TyKind> { + assert_ne!(self.token, token::Question); + if plus { + self.eat_plus(); // `+`, or `+=` gets split and `+` is discarded + bounds.append(&mut self.parse_generic_bounds(Some(self.prev_token.span))?); + } + Ok(TyKind::TraitObject(bounds, TraitObjectSyntax::None)) + } + + /// Parses a raw pointer type: `*[const | mut] $type`. + fn parse_ty_ptr(&mut self) -> PResult<'a, TyKind> { + let mutbl = self.parse_const_or_mut().unwrap_or_else(|| { + let span = self.prev_token.span; + let msg = "expected mut or const in raw pointer type"; + self.struct_span_err(span, msg) + .span_label(span, msg) + .help("use `*mut T` or `*const T` as appropriate") + .emit(); + Mutability::Not + }); + let ty = self.parse_ty_no_plus()?; + Ok(TyKind::Ptr(MutTy { ty, mutbl })) + } + + /// Parses an array (`[TYPE; EXPR]`) or slice (`[TYPE]`) type. + /// The opening `[` bracket is already eaten. + fn parse_array_or_slice_ty(&mut self) -> PResult<'a, TyKind> { + let elt_ty = self.parse_ty()?; + let ty = if self.eat(&token::Semi) { + TyKind::Array(elt_ty, self.parse_anon_const_expr()?) + } else { + TyKind::Slice(elt_ty) + }; + self.expect(&token::CloseDelim(token::Bracket))?; + Ok(ty) + } + + fn parse_borrowed_pointee(&mut self) -> PResult<'a, TyKind> { + let opt_lifetime = if self.check_lifetime() { Some(self.expect_lifetime()) } else { None }; + let mutbl = self.parse_mutability(); + let ty = self.parse_ty_no_plus()?; + Ok(TyKind::Rptr(opt_lifetime, MutTy { ty, mutbl })) + } + + // Parses the `typeof(EXPR)`. + // To avoid ambiguity, the type is surrounded by parenthesis. + fn parse_typeof_ty(&mut self) -> PResult<'a, TyKind> { + self.expect(&token::OpenDelim(token::Paren))?; + let expr = self.parse_anon_const_expr()?; + self.expect(&token::CloseDelim(token::Paren))?; + Ok(TyKind::Typeof(expr)) + } + + /// Parses a function pointer type (`TyKind::BareFn`). + /// ``` + /// [unsafe] [extern "ABI"] fn (S) -> T + /// ^~~~~^ ^~~~^ ^~^ ^ + /// | | | | + /// | | | Return type + /// Function Style ABI Parameter types + /// ``` + /// We actually parse `FnHeader FnDecl`, but we error on `const` and `async` qualifiers. + fn parse_ty_bare_fn(&mut self, lo: Span, params: Vec<GenericParam>) -> PResult<'a, TyKind> { + let ast::FnHeader { ext, unsafety, constness, asyncness } = self.parse_fn_front_matter()?; + let decl = self.parse_fn_decl(|_| false, AllowPlus::No)?; + let whole_span = lo.to(self.prev_token.span); + if let ast::Const::Yes(span) = constness { + self.error_fn_ptr_bad_qualifier(whole_span, span, "const"); + } + if let ast::Async::Yes { span, .. } = asyncness { + self.error_fn_ptr_bad_qualifier(whole_span, span, "async"); + } + Ok(TyKind::BareFn(P(BareFnTy { ext, unsafety, generic_params: params, decl }))) + } + + /// Emit an error for the given bad function pointer qualifier. + fn error_fn_ptr_bad_qualifier(&self, span: Span, qual_span: Span, qual: &str) { + self.struct_span_err(span, &format!("an `fn` pointer type cannot be `{}`", qual)) + .span_label(qual_span, format!("`{}` because of this", qual)) + .span_suggestion_short( + qual_span, + &format!("remove the `{}` qualifier", qual), + String::new(), + Applicability::MaybeIncorrect, + ) + .emit(); + } + + /// Parses an `impl B0 + ... + Bn` type. + fn parse_impl_ty(&mut self, impl_dyn_multi: &mut bool) -> PResult<'a, TyKind> { + // Always parse bounds greedily for better error recovery. + let bounds = self.parse_generic_bounds(None)?; + *impl_dyn_multi = bounds.len() > 1 || self.prev_token.kind == TokenKind::BinOp(token::Plus); + Ok(TyKind::ImplTrait(ast::DUMMY_NODE_ID, bounds)) + } + + /// Is a `dyn B0 + ... + Bn` type allowed here? + fn is_explicit_dyn_type(&mut self) -> bool { + self.check_keyword(kw::Dyn) + && (self.token.uninterpolated_span().rust_2018() + || self.look_ahead(1, |t| { + t.can_begin_bound() && !can_continue_type_after_non_fn_ident(t) + })) + } + + /// Parses a `dyn B0 + ... + Bn` type. + /// + /// Note that this does *not* parse bare trait objects. + fn parse_dyn_ty(&mut self, impl_dyn_multi: &mut bool) -> PResult<'a, TyKind> { + self.bump(); // `dyn` + // Always parse bounds greedily for better error recovery. + let bounds = self.parse_generic_bounds(None)?; + *impl_dyn_multi = bounds.len() > 1 || self.prev_token.kind == TokenKind::BinOp(token::Plus); + Ok(TyKind::TraitObject(bounds, TraitObjectSyntax::Dyn)) + } + + /// Parses a type starting with a path. + /// + /// This can be: + /// 1. a type macro, `mac!(...)`, + /// 2. a bare trait object, `B0 + ... + Bn`, + /// 3. or a path, `path::to::MyType`. + fn parse_path_start_ty(&mut self, lo: Span, allow_plus: AllowPlus) -> PResult<'a, TyKind> { + // Simple path + let path = self.parse_path(PathStyle::Type)?; + if self.eat(&token::Not) { + // Macro invocation in type position + Ok(TyKind::MacCall(MacCall { + path, + args: self.parse_mac_args()?, + prior_type_ascription: self.last_type_ascription, + })) + } else if allow_plus == AllowPlus::Yes && self.check_plus() { + // `Trait1 + Trait2 + 'a` + self.parse_remaining_bounds_path(Vec::new(), path, lo, true) + } else { + // Just a type path. + Ok(TyKind::Path(None, path)) + } + } + + fn error_illegal_c_varadic_ty(&self, lo: Span) { + struct_span_err!( + self.sess.span_diagnostic, + lo.to(self.prev_token.span), + E0743, + "C-variadic type `...` may not be nested inside another type", + ) + .emit(); + } + + pub(super) fn parse_generic_bounds( + &mut self, + colon_span: Option<Span>, + ) -> PResult<'a, GenericBounds> { + self.parse_generic_bounds_common(AllowPlus::Yes, colon_span) + } + + /// Parses bounds of a type parameter `BOUND + BOUND + ...`, possibly with trailing `+`. + /// + /// See `parse_generic_bound` for the `BOUND` grammar. + fn parse_generic_bounds_common( + &mut self, + allow_plus: AllowPlus, + colon_span: Option<Span>, + ) -> PResult<'a, GenericBounds> { + let mut bounds = Vec::new(); + let mut negative_bounds = Vec::new(); + while self.can_begin_bound() { + match self.parse_generic_bound()? { + Ok(bound) => bounds.push(bound), + Err(neg_sp) => negative_bounds.push(neg_sp), + } + if allow_plus == AllowPlus::No || !self.eat_plus() { + break; + } + } + + if !negative_bounds.is_empty() { + self.error_negative_bounds(colon_span, &bounds, negative_bounds); + } + + Ok(bounds) + } + + /// Can the current token begin a bound? + fn can_begin_bound(&mut self) -> bool { + // This needs to be synchronized with `TokenKind::can_begin_bound`. + self.check_path() + || self.check_lifetime() + || self.check(&token::Not) // Used for error reporting only. + || self.check(&token::Question) + || self.check_keyword(kw::For) + || self.check(&token::OpenDelim(token::Paren)) + } + + fn error_negative_bounds( + &self, + colon_span: Option<Span>, + bounds: &[GenericBound], + negative_bounds: Vec<Span>, + ) { + let negative_bounds_len = negative_bounds.len(); + let last_span = *negative_bounds.last().expect("no negative bounds, but still error?"); + let mut err = self.struct_span_err(negative_bounds, "negative bounds are not supported"); + err.span_label(last_span, "negative bounds are not supported"); + if let Some(bound_list) = colon_span { + let bound_list = bound_list.to(self.prev_token.span); + let mut new_bound_list = String::new(); + if !bounds.is_empty() { + let mut snippets = bounds.iter().map(|bound| self.span_to_snippet(bound.span())); + while let Some(Ok(snippet)) = snippets.next() { + new_bound_list.push_str(" + "); + new_bound_list.push_str(&snippet); + } + new_bound_list = new_bound_list.replacen(" +", ":", 1); + } + err.tool_only_span_suggestion( + bound_list, + &format!("remove the bound{}", pluralize!(negative_bounds_len)), + new_bound_list, + Applicability::MachineApplicable, + ); + } + err.emit(); + } + + /// Parses a bound according to the grammar: + /// ``` + /// BOUND = TY_BOUND | LT_BOUND + /// ``` + fn parse_generic_bound(&mut self) -> PResult<'a, Result<GenericBound, Span>> { + let anchor_lo = self.prev_token.span; + let lo = self.token.span; + let has_parens = self.eat(&token::OpenDelim(token::Paren)); + let inner_lo = self.token.span; + let is_negative = self.eat(&token::Not); + + let modifiers = self.parse_ty_bound_modifiers(); + let bound = if self.token.is_lifetime() { + self.error_lt_bound_with_modifiers(modifiers); + self.parse_generic_lt_bound(lo, inner_lo, has_parens)? + } else { + self.parse_generic_ty_bound(lo, has_parens, modifiers)? + }; + + Ok(if is_negative { Err(anchor_lo.to(self.prev_token.span)) } else { Ok(bound) }) + } + + /// Parses a lifetime ("outlives") bound, e.g. `'a`, according to: + /// ``` + /// LT_BOUND = LIFETIME + /// ``` + fn parse_generic_lt_bound( + &mut self, + lo: Span, + inner_lo: Span, + has_parens: bool, + ) -> PResult<'a, GenericBound> { + let bound = GenericBound::Outlives(self.expect_lifetime()); + if has_parens { + // FIXME(Centril): Consider not erroring here and accepting `('lt)` instead, + // possibly introducing `GenericBound::Paren(P<GenericBound>)`? + self.recover_paren_lifetime(lo, inner_lo)?; + } + Ok(bound) + } + + /// Emits an error if any trait bound modifiers were present. + fn error_lt_bound_with_modifiers(&self, modifiers: BoundModifiers) { + if let Some(span) = modifiers.maybe_const { + self.struct_span_err( + span, + "`?const` may only modify trait bounds, not lifetime bounds", + ) + .emit(); + } + + if let Some(span) = modifiers.maybe { + self.struct_span_err(span, "`?` may only modify trait bounds, not lifetime bounds") + .emit(); + } + } + + /// Recover on `('lifetime)` with `(` already eaten. + fn recover_paren_lifetime(&mut self, lo: Span, inner_lo: Span) -> PResult<'a, ()> { + let inner_span = inner_lo.to(self.prev_token.span); + self.expect(&token::CloseDelim(token::Paren))?; + let mut err = self.struct_span_err( + lo.to(self.prev_token.span), + "parenthesized lifetime bounds are not supported", + ); + if let Ok(snippet) = self.span_to_snippet(inner_span) { + err.span_suggestion_short( + lo.to(self.prev_token.span), + "remove the parentheses", + snippet, + Applicability::MachineApplicable, + ); + } + err.emit(); + Ok(()) + } + + /// Parses the modifiers that may precede a trait in a bound, e.g. `?Trait` or `?const Trait`. + /// + /// If no modifiers are present, this does not consume any tokens. + /// + /// ``` + /// TY_BOUND_MODIFIERS = "?" ["const" ["?"]] + /// ``` + fn parse_ty_bound_modifiers(&mut self) -> BoundModifiers { + if !self.eat(&token::Question) { + return BoundModifiers { maybe: None, maybe_const: None }; + } + + // `? ...` + let first_question = self.prev_token.span; + if !self.eat_keyword(kw::Const) { + return BoundModifiers { maybe: Some(first_question), maybe_const: None }; + } + + // `?const ...` + let maybe_const = first_question.to(self.prev_token.span); + self.sess.gated_spans.gate(sym::const_trait_bound_opt_out, maybe_const); + if !self.eat(&token::Question) { + return BoundModifiers { maybe: None, maybe_const: Some(maybe_const) }; + } + + // `?const ? ...` + let second_question = self.prev_token.span; + BoundModifiers { maybe: Some(second_question), maybe_const: Some(maybe_const) } + } + + /// Parses a type bound according to: + /// ``` + /// TY_BOUND = TY_BOUND_NOPAREN | (TY_BOUND_NOPAREN) + /// TY_BOUND_NOPAREN = [TY_BOUND_MODIFIERS] [for<LT_PARAM_DEFS>] SIMPLE_PATH + /// ``` + /// + /// For example, this grammar accepts `?const ?for<'a: 'b> m::Trait<'a>`. + fn parse_generic_ty_bound( + &mut self, + lo: Span, + has_parens: bool, + modifiers: BoundModifiers, + ) -> PResult<'a, GenericBound> { + let lifetime_defs = self.parse_late_bound_lifetime_defs()?; + let path = self.parse_path(PathStyle::Type)?; + if has_parens { + self.expect(&token::CloseDelim(token::Paren))?; + } + + let modifier = modifiers.to_trait_bound_modifier(); + let poly_trait = PolyTraitRef::new(lifetime_defs, path, lo.to(self.prev_token.span)); + Ok(GenericBound::Trait(poly_trait, modifier)) + } + + /// Optionally parses `for<$generic_params>`. + pub(super) fn parse_late_bound_lifetime_defs(&mut self) -> PResult<'a, Vec<GenericParam>> { + if self.eat_keyword(kw::For) { + self.expect_lt()?; + let params = self.parse_generic_params()?; + self.expect_gt()?; + // We rely on AST validation to rule out invalid cases: There must not be type + // parameters, and the lifetime parameters must not have bounds. + Ok(params) + } else { + Ok(Vec::new()) + } + } + + pub(super) fn check_lifetime(&mut self) -> bool { + self.expected_tokens.push(TokenType::Lifetime); + self.token.is_lifetime() + } + + /// Parses a single lifetime `'a` or panics. + pub(super) fn expect_lifetime(&mut self) -> Lifetime { + if let Some(ident) = self.token.lifetime() { + self.bump(); + Lifetime { ident, id: ast::DUMMY_NODE_ID } + } else { + self.span_bug(self.token.span, "not a lifetime") + } + } + + pub(super) fn mk_ty(&self, span: Span, kind: TyKind) -> P<Ty> { + P(Ty { kind, span, id: ast::DUMMY_NODE_ID }) + } +} diff --git a/compiler/rustc_parse/src/validate_attr.rs b/compiler/rustc_parse/src/validate_attr.rs new file mode 100644 index 00000000000..f4bb9610940 --- /dev/null +++ b/compiler/rustc_parse/src/validate_attr.rs @@ -0,0 +1,163 @@ +//! Meta-syntax validation logic of attributes for post-expansion. + +use crate::parse_in; + +use rustc_ast::tokenstream::DelimSpan; +use rustc_ast::{self as ast, Attribute, MacArgs, MacDelimiter, MetaItem, MetaItemKind}; +use rustc_errors::{Applicability, PResult}; +use rustc_feature::{AttributeTemplate, BUILTIN_ATTRIBUTE_MAP}; +use rustc_session::lint::builtin::ILL_FORMED_ATTRIBUTE_INPUT; +use rustc_session::parse::ParseSess; +use rustc_span::{sym, Symbol}; + +pub fn check_meta(sess: &ParseSess, attr: &Attribute) { + if attr.is_doc_comment() { + return; + } + + let attr_info = + attr.ident().and_then(|ident| BUILTIN_ATTRIBUTE_MAP.get(&ident.name)).map(|a| **a); + + // Check input tokens for built-in and key-value attributes. + match attr_info { + // `rustc_dummy` doesn't have any restrictions specific to built-in attributes. + Some((name, _, template, _)) if name != sym::rustc_dummy => { + check_builtin_attribute(sess, attr, name, template) + } + _ => { + if let MacArgs::Eq(..) = attr.get_normal_item().args { + // All key-value attributes are restricted to meta-item syntax. + parse_meta(sess, attr) + .map_err(|mut err| { + err.emit(); + }) + .ok(); + } + } + } +} + +pub fn parse_meta<'a>(sess: &'a ParseSess, attr: &Attribute) -> PResult<'a, MetaItem> { + let item = attr.get_normal_item(); + Ok(MetaItem { + span: attr.span, + path: item.path.clone(), + kind: match &item.args { + MacArgs::Empty => MetaItemKind::Word, + MacArgs::Eq(_, t) => { + let v = parse_in(sess, t.clone(), "name value", |p| p.parse_unsuffixed_lit())?; + MetaItemKind::NameValue(v) + } + MacArgs::Delimited(dspan, delim, t) => { + check_meta_bad_delim(sess, *dspan, *delim, "wrong meta list delimiters"); + let nmis = parse_in(sess, t.clone(), "meta list", |p| p.parse_meta_seq_top())?; + MetaItemKind::List(nmis) + } + }, + }) +} + +pub fn check_meta_bad_delim(sess: &ParseSess, span: DelimSpan, delim: MacDelimiter, msg: &str) { + if let ast::MacDelimiter::Parenthesis = delim { + return; + } + + sess.span_diagnostic + .struct_span_err(span.entire(), msg) + .multipart_suggestion( + "the delimiters should be `(` and `)`", + vec![(span.open, "(".to_string()), (span.close, ")".to_string())], + Applicability::MachineApplicable, + ) + .emit(); +} + +/// Checks that the given meta-item is compatible with this `AttributeTemplate`. +fn is_attr_template_compatible(template: &AttributeTemplate, meta: &ast::MetaItemKind) -> bool { + match meta { + MetaItemKind::Word => template.word, + MetaItemKind::List(..) => template.list.is_some(), + MetaItemKind::NameValue(lit) if lit.kind.is_str() => template.name_value_str.is_some(), + MetaItemKind::NameValue(..) => false, + } +} + +pub fn check_builtin_attribute( + sess: &ParseSess, + attr: &Attribute, + name: Symbol, + template: AttributeTemplate, +) { + // Some special attributes like `cfg` must be checked + // before the generic check, so we skip them here. + let should_skip = |name| name == sym::cfg; + // Some of previously accepted forms were used in practice, + // report them as warnings for now. + let should_warn = |name| { + name == sym::doc + || name == sym::ignore + || name == sym::inline + || name == sym::link + || name == sym::test + || name == sym::bench + }; + + match parse_meta(sess, attr) { + Ok(meta) => { + if !should_skip(name) && !is_attr_template_compatible(&template, &meta.kind) { + let error_msg = format!("malformed `{}` attribute input", name); + let mut msg = "attribute must be of the form ".to_owned(); + let mut suggestions = vec![]; + let mut first = true; + if template.word { + first = false; + let code = format!("#[{}]", name); + msg.push_str(&format!("`{}`", &code)); + suggestions.push(code); + } + if let Some(descr) = template.list { + if !first { + msg.push_str(" or "); + } + first = false; + let code = format!("#[{}({})]", name, descr); + msg.push_str(&format!("`{}`", &code)); + suggestions.push(code); + } + if let Some(descr) = template.name_value_str { + if !first { + msg.push_str(" or "); + } + let code = format!("#[{} = \"{}\"]", name, descr); + msg.push_str(&format!("`{}`", &code)); + suggestions.push(code); + } + if should_warn(name) { + sess.buffer_lint( + &ILL_FORMED_ATTRIBUTE_INPUT, + meta.span, + ast::CRATE_NODE_ID, + &msg, + ); + } else { + sess.span_diagnostic + .struct_span_err(meta.span, &error_msg) + .span_suggestions( + meta.span, + if suggestions.len() == 1 { + "must be of the form" + } else { + "the following are the possible correct uses" + }, + suggestions.into_iter(), + Applicability::HasPlaceholders, + ) + .emit(); + } + } + } + Err(mut err) => { + err.emit(); + } + } +} |
