diff options
| author | Sayan Nandan <17377258+sntdevco@users.noreply.github.com> | 2019-08-09 13:01:05 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-08-09 13:01:05 +0530 |
| commit | fb3a01354ffecc41d7a189e4dd225d706387a522 (patch) | |
| tree | 41492dfe93f1dccba847dadb56ac6aa079edaaa9 /src/libsyntax/parse | |
| parent | 33445aea509cadcd715009c79795d289268daa7c (diff) | |
| parent | 5aa3d9a7b5d3a46a7f158e8881146331a6bc9243 (diff) | |
| download | rust-fb3a01354ffecc41d7a189e4dd225d706387a522.tar.gz rust-fb3a01354ffecc41d7a189e4dd225d706387a522.zip | |
Merge pull request #1 from rust-lang/master
Merge recent changes into master
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/attr.rs | 114 | ||||
| -rw-r--r-- | src/libsyntax/parse/classify.rs | 15 | ||||
| -rw-r--r-- | src/libsyntax/parse/diagnostics.rs | 1374 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/comments.rs | 370 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/comments/tests.rs | 47 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 2480 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/tests.rs | 255 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/tokentrees.rs | 107 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/unicode_chars.rs | 171 | ||||
| -rw-r--r-- | src/libsyntax/parse/literal.rs | 470 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 873 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 4384 | ||||
| -rw-r--r-- | src/libsyntax/parse/tests.rs | 339 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 431 | ||||
| -rw-r--r-- | src/libsyntax/parse/unescape_error_reporting.rs | 209 |
15 files changed, 5703 insertions, 5936 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index e93e15f9012..a42da112360 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -1,30 +1,39 @@ use crate::attr; use crate::ast; -use crate::source_map::respan; use crate::parse::{SeqSep, PResult}; use crate::parse::token::{self, Nonterminal, DelimToken}; use crate::parse::parser::{Parser, TokenType, PathStyle}; use crate::tokenstream::{TokenStream, TokenTree}; +use crate::source_map::Span; use log::debug; +use smallvec::smallvec; #[derive(Debug)] enum InnerAttributeParsePolicy<'a> { Permitted, - NotPermitted { reason: &'a str }, + NotPermitted { reason: &'a str, saw_doc_comment: bool, prev_attr_sp: Option<Span> }, } const DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG: &str = "an inner attribute is not \ permitted in this context"; impl<'a> Parser<'a> { + crate fn parse_arg_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> { + let attrs = self.parse_outer_attributes()?; + attrs.iter().for_each(|a| + self.sess.param_attr_spans.borrow_mut().push(a.span) + ); + Ok(attrs) + } + /// Parse attributes that appear before an item crate fn parse_outer_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> { let mut attrs: Vec<ast::Attribute> = Vec::new(); let mut just_parsed_doc_comment = false; loop { debug!("parse_outer_attributes: self.token={:?}", self.token); - match self.token { + match self.token.kind { token::Pound => { let inner_error_reason = if just_parsed_doc_comment { "an inner attribute is not permitted following an outer doc comment" @@ -34,12 +43,17 @@ impl<'a> Parser<'a> { DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG }; let inner_parse_policy = - InnerAttributeParsePolicy::NotPermitted { reason: inner_error_reason }; - attrs.push(self.parse_attribute_with_inner_parse_policy(inner_parse_policy)?); + InnerAttributeParsePolicy::NotPermitted { + reason: inner_error_reason, + saw_doc_comment: just_parsed_doc_comment, + prev_attr_sp: attrs.last().and_then(|a| Some(a.span)) + }; + let attr = self.parse_attribute_with_inner_parse_policy(inner_parse_policy)?; + attrs.push(attr); just_parsed_doc_comment = false; } token::DocComment(s) => { - let attr = attr::mk_sugared_doc_attr(attr::mk_attr_id(), s, self.span); + let attr = attr::mk_sugared_doc_attr(s, self.token.span); if attr.style != ast::AttrStyle::Outer { let mut err = self.fatal("expected outer doc comment"); err.note("inner doc comments like this (starting with \ @@ -67,8 +81,11 @@ impl<'a> Parser<'a> { let inner_parse_policy = if permit_inner { InnerAttributeParsePolicy::Permitted } else { - InnerAttributeParsePolicy::NotPermitted - { reason: DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG } + InnerAttributeParsePolicy::NotPermitted { + reason: DEFAULT_UNEXPECTED_INNER_ATTR_ERR_MSG, + saw_doc_comment: false, + prev_attr_sp: None + } }; self.parse_attribute_with_inner_parse_policy(inner_parse_policy) } @@ -81,27 +98,17 @@ impl<'a> Parser<'a> { debug!("parse_attribute_with_inner_parse_policy: inner_parse_policy={:?} self.token={:?}", inner_parse_policy, self.token); - let (span, path, tokens, style) = match self.token { + let (span, path, tokens, style) = match self.token.kind { token::Pound => { - let lo = self.span; + let lo = self.token.span; self.bump(); if let InnerAttributeParsePolicy::Permitted = inner_parse_policy { self.expected_tokens.push(TokenType::Token(token::Not)); } + let style = if self.token == token::Not { self.bump(); - if let InnerAttributeParsePolicy::NotPermitted { reason } = inner_parse_policy - { - let span = self.span; - self.diagnostic() - .struct_span_err(span, reason) - .note("inner attributes, like `#![no_std]`, annotate the item \ - enclosing them, and are usually found at the beginning of \ - source files. Outer attributes, like `#[test]`, annotate the \ - item following them.") - .emit() - } ast::AttrStyle::Inner } else { ast::AttrStyle::Outer @@ -112,7 +119,38 @@ impl<'a> Parser<'a> { self.expect(&token::CloseDelim(token::Bracket))?; let hi = self.prev_span; - (lo.to(hi), path, tokens, style) + let attr_sp = lo.to(hi); + + // Emit error if inner attribute is encountered and not permitted + if style == ast::AttrStyle::Inner { + if let InnerAttributeParsePolicy::NotPermitted { reason, + saw_doc_comment, prev_attr_sp } = inner_parse_policy { + let prev_attr_note = if saw_doc_comment { + "previous doc comment" + } else { + "previous outer attribute" + }; + + let mut diagnostic = self + .diagnostic() + .struct_span_err(attr_sp, reason); + + if let Some(prev_attr_sp) = prev_attr_sp { + diagnostic + .span_label(attr_sp, "not permitted following an outer attibute") + .span_label(prev_attr_sp, prev_attr_note); + } + + diagnostic + .note("inner attributes, like `#![no_std]`, annotate the item \ + enclosing them, and are usually found at the beginning of \ + source files. Outer attributes, like `#[test]`, annotate the \ + item following them.") + .emit() + } + } + + (attr_sp, path, tokens, style) } _ => { let token_str = self.this_token_to_string(); @@ -140,7 +178,7 @@ impl<'a> Parser<'a> { /// PATH `=` TOKEN_TREE /// The delimiters or `=` are still put into the resulting token stream. crate fn parse_meta_item_unrestricted(&mut self) -> PResult<'a, (ast::Path, TokenStream)> { - let meta = match self.token { + let meta = match self.token.kind { token::Interpolated(ref nt) => match **nt { Nonterminal::NtMeta(ref meta) => Some(meta.clone()), _ => None, @@ -149,7 +187,7 @@ impl<'a> Parser<'a> { }; Ok(if let Some(meta) = meta { self.bump(); - (meta.ident, meta.node.tokens(meta.span)) + (meta.path, meta.node.tokens(meta.span)) } else { let path = self.parse_path(PathStyle::Mod)?; let tokens = if self.check(&token::OpenDelim(DelimToken::Paren)) || @@ -157,9 +195,9 @@ impl<'a> Parser<'a> { self.check(&token::OpenDelim(DelimToken::Brace)) { self.parse_token_tree().into() } else if self.eat(&token::Eq) { - let eq = TokenTree::Token(self.prev_span, token::Eq); + let eq = TokenTree::token(token::Eq, self.prev_span); let mut is_interpolated_expr = false; - if let token::Interpolated(nt) = &self.token { + if let token::Interpolated(nt) = &self.token.kind { if let token::NtExpr(..) = **nt { is_interpolated_expr = true; } @@ -172,7 +210,7 @@ impl<'a> Parser<'a> { } else { self.parse_unsuffixed_lit()?.tokens() }; - TokenStream::from_streams(vec![eq.into(), tokens]) + TokenStream::from_streams(smallvec![eq.into(), tokens]) } else { TokenStream::empty() }; @@ -188,7 +226,7 @@ impl<'a> Parser<'a> { crate fn parse_inner_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> { let mut attrs: Vec<ast::Attribute> = vec![]; loop { - match self.token { + match self.token.kind { token::Pound => { // Don't even try to parse if it's not an inner attribute. if !self.look_ahead(1, |t| t == &token::Not) { @@ -201,7 +239,7 @@ impl<'a> Parser<'a> { } token::DocComment(s) => { // we need to get the position of this token before we bump. - let attr = attr::mk_sugared_doc_attr(attr::mk_attr_id(), s, self.span); + let attr = attr::mk_sugared_doc_attr(s, self.token.span); if attr.style == ast::AttrStyle::Inner { attrs.push(attr); self.bump(); @@ -217,7 +255,7 @@ impl<'a> Parser<'a> { fn parse_unsuffixed_lit(&mut self) -> PResult<'a, ast::Lit> { let lit = self.parse_lit()?; - debug!("Checking if {:?} is unusuffixed.", lit); + debug!("checking if {:?} is unusuffixed", lit); if !lit.node.is_unsuffixed() { let msg = "suffixed literals are not allowed in attributes"; @@ -236,7 +274,7 @@ impl<'a> Parser<'a> { /// meta_item : IDENT ( '=' UNSUFFIXED_LIT | '(' meta_item_inner? ')' )? ; /// meta_item_inner : (meta_item | UNSUFFIXED_LIT) (',' meta_item_inner)? ; pub fn parse_meta_item(&mut self) -> PResult<'a, ast::MetaItem> { - let nt_meta = match self.token { + let nt_meta = match self.token.kind { token::Interpolated(ref nt) => match **nt { token::NtMeta(ref e) => Some(e.clone()), _ => None, @@ -249,11 +287,11 @@ impl<'a> Parser<'a> { return Ok(meta); } - let lo = self.span; - let ident = self.parse_path(PathStyle::Mod)?; + let lo = self.token.span; + let path = self.parse_path(PathStyle::Mod)?; let node = self.parse_meta_item_kind()?; let span = lo.to(self.prev_span); - Ok(ast::MetaItem { ident, node, span }) + Ok(ast::MetaItem { path, node, span }) } crate fn parse_meta_item_kind(&mut self) -> PResult<'a, ast::MetaItemKind> { @@ -268,25 +306,23 @@ impl<'a> Parser<'a> { /// matches meta_item_inner : (meta_item | UNSUFFIXED_LIT) ; fn parse_meta_item_inner(&mut self) -> PResult<'a, ast::NestedMetaItem> { - let lo = self.span; - match self.parse_unsuffixed_lit() { Ok(lit) => { - return Ok(respan(lo.to(self.prev_span), ast::NestedMetaItemKind::Literal(lit))) + return Ok(ast::NestedMetaItem::Literal(lit)) } Err(ref mut err) => self.diagnostic().cancel(err) } match self.parse_meta_item() { Ok(mi) => { - return Ok(respan(lo.to(self.prev_span), ast::NestedMetaItemKind::MetaItem(mi))) + return Ok(ast::NestedMetaItem::MetaItem(mi)) } Err(ref mut err) => self.diagnostic().cancel(err) } let found = self.this_token_to_string(); let msg = format!("expected unsuffixed literal or identifier, found `{}`", found); - Err(self.diagnostic().struct_span_err(self.span, &msg)) + Err(self.diagnostic().struct_span_err(self.token.span, &msg)) } /// matches meta_seq = ( COMMASEP(meta_item_inner) ) diff --git a/src/libsyntax/parse/classify.rs b/src/libsyntax/parse/classify.rs index b4103440e35..6ebfab3a133 100644 --- a/src/libsyntax/parse/classify.rs +++ b/src/libsyntax/parse/classify.rs @@ -14,27 +14,12 @@ use crate::ast; pub fn expr_requires_semi_to_be_stmt(e: &ast::Expr) -> bool { match e.node { ast::ExprKind::If(..) | - ast::ExprKind::IfLet(..) | ast::ExprKind::Match(..) | ast::ExprKind::Block(..) | ast::ExprKind::While(..) | - ast::ExprKind::WhileLet(..) | ast::ExprKind::Loop(..) | ast::ExprKind::ForLoop(..) | ast::ExprKind::TryBlock(..) => false, _ => true, } } - -/// this statement requires a semicolon after it. -/// note that in one case (`stmt_semi`), we've already -/// seen the semicolon, and thus don't need another. -pub fn stmt_ends_with_semi(stmt: &ast::StmtKind) -> bool { - match *stmt { - ast::StmtKind::Local(_) => true, - ast::StmtKind::Expr(ref e) => expr_requires_semi_to_be_stmt(e), - ast::StmtKind::Item(_) | - ast::StmtKind::Semi(..) | - ast::StmtKind::Mac(..) => false, - } -} diff --git a/src/libsyntax/parse/diagnostics.rs b/src/libsyntax/parse/diagnostics.rs new file mode 100644 index 00000000000..730efb5ef01 --- /dev/null +++ b/src/libsyntax/parse/diagnostics.rs @@ -0,0 +1,1374 @@ +use crate::ast::{ + self, Arg, BinOpKind, BindingMode, BlockCheckMode, Expr, ExprKind, Ident, Item, ItemKind, + Mutability, Pat, PatKind, PathSegment, QSelf, Ty, TyKind, VariantData, +}; +use crate::feature_gate::{feature_err, UnstableFeatures}; +use crate::parse::{SeqSep, PResult, Parser, ParseSess}; +use crate::parse::parser::{BlockMode, PathStyle, SemiColonMode, TokenType, TokenExpectType}; +use crate::parse::token::{self, TokenKind}; +use crate::print::pprust; +use crate::ptr::P; +use crate::source_map::Spanned; +use crate::symbol::{kw, sym}; +use crate::ThinVec; +use crate::util::parser::AssocOp; +use errors::{Applicability, DiagnosticBuilder, DiagnosticId}; +use rustc_data_structures::fx::FxHashSet; +use syntax_pos::{Span, DUMMY_SP, MultiSpan, SpanSnippetError}; +use log::{debug, trace}; +use std::mem; + +/// Creates a placeholder argument. +crate fn dummy_arg(ident: Ident) -> Arg { + let pat = P(Pat { + id: ast::DUMMY_NODE_ID, + node: PatKind::Ident(BindingMode::ByValue(Mutability::Immutable), ident, None), + span: ident.span, + }); + let ty = Ty { + node: TyKind::Err, + span: ident.span, + id: ast::DUMMY_NODE_ID + }; + Arg { attrs: ThinVec::default(), id: ast::DUMMY_NODE_ID, pat, span: ident.span, ty: P(ty) } +} + +pub enum Error { + FileNotFoundForModule { + mod_name: String, + default_path: String, + secondary_path: String, + dir_path: String, + }, + DuplicatePaths { + mod_name: String, + default_path: String, + secondary_path: String, + }, + UselessDocComment, + InclusiveRangeWithNoEnd, +} + +impl Error { + fn span_err<S: Into<MultiSpan>>( + self, + sp: S, + handler: &errors::Handler, + ) -> DiagnosticBuilder<'_> { + match self { + Error::FileNotFoundForModule { + ref mod_name, + ref default_path, + ref secondary_path, + ref dir_path, + } => { + let mut err = struct_span_err!( + handler, + sp, + E0583, + "file not found for module `{}`", + mod_name, + ); + err.help(&format!( + "name the file either {} or {} inside the directory \"{}\"", + default_path, + secondary_path, + dir_path, + )); + err + } + Error::DuplicatePaths { ref mod_name, ref default_path, ref secondary_path } => { + let mut err = struct_span_err!( + handler, + sp, + E0584, + "file for module `{}` found at both {} and {}", + mod_name, + default_path, + secondary_path, + ); + err.help("delete or rename one of them to remove the ambiguity"); + err + } + Error::UselessDocComment => { + let mut err = struct_span_err!( + handler, + sp, + E0585, + "found a documentation comment that doesn't document anything", + ); + err.help("doc comments must come before what they document, maybe a comment was \ + intended with `//`?"); + err + } + Error::InclusiveRangeWithNoEnd => { + let mut err = struct_span_err!( + handler, + sp, + E0586, + "inclusive range with no end", + ); + err.help("inclusive ranges must be bounded at the end (`..=b` or `a..=b`)"); + err + } + } + } +} + +pub trait RecoverQPath: Sized + 'static { + const PATH_STYLE: PathStyle = PathStyle::Expr; + fn to_ty(&self) -> Option<P<Ty>>; + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self; +} + +impl RecoverQPath for Ty { + const PATH_STYLE: PathStyle = PathStyle::Type; + fn to_ty(&self) -> Option<P<Ty>> { + Some(P(self.clone())) + } + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self { + Self { + span: path.span, + node: TyKind::Path(qself, path), + id: ast::DUMMY_NODE_ID, + } + } +} + +impl RecoverQPath for Pat { + fn to_ty(&self) -> Option<P<Ty>> { + self.to_ty() + } + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self { + Self { + span: path.span, + node: PatKind::Path(qself, path), + id: ast::DUMMY_NODE_ID, + } + } +} + +impl RecoverQPath for Expr { + fn to_ty(&self) -> Option<P<Ty>> { + self.to_ty() + } + fn recovered(qself: Option<QSelf>, path: ast::Path) -> Self { + Self { + span: path.span, + node: ExprKind::Path(qself, path), + attrs: ThinVec::new(), + id: ast::DUMMY_NODE_ID, + } + } +} + +impl<'a> Parser<'a> { + pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> { + self.span_fatal(self.token.span, m) + } + + pub fn span_fatal<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { + self.sess.span_diagnostic.struct_span_fatal(sp, m) + } + + pub fn span_fatal_err<S: Into<MultiSpan>>(&self, sp: S, err: Error) -> DiagnosticBuilder<'a> { + err.span_err(sp, self.diagnostic()) + } + + pub fn bug(&self, m: &str) -> ! { + self.sess.span_diagnostic.span_bug(self.token.span, m) + } + + pub fn span_err<S: Into<MultiSpan>>(&self, sp: S, m: &str) { + self.sess.span_diagnostic.span_err(sp, m) + } + + crate fn struct_span_err<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { + self.sess.span_diagnostic.struct_span_err(sp, m) + } + + crate fn span_bug<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> ! { + self.sess.span_diagnostic.span_bug(sp, m) + } + + crate fn cancel(&self, err: &mut DiagnosticBuilder<'_>) { + self.sess.span_diagnostic.cancel(err) + } + + crate fn diagnostic(&self) -> &'a errors::Handler { + &self.sess.span_diagnostic + } + + crate fn span_to_snippet(&self, span: Span) -> Result<String, SpanSnippetError> { + self.sess.source_map().span_to_snippet(span) + } + + crate fn expected_ident_found(&self) -> DiagnosticBuilder<'a> { + let mut err = self.struct_span_err( + self.token.span, + &format!("expected identifier, found {}", self.this_token_descr()), + ); + if let token::Ident(name, false) = self.token.kind { + if Ident::new(name, self.token.span).is_raw_guess() { + err.span_suggestion( + self.token.span, + "you can escape reserved keywords to use them as identifiers", + format!("r#{}", name), + Applicability::MaybeIncorrect, + ); + } + } + if let Some(token_descr) = self.token_descr() { + err.span_label(self.token.span, format!("expected identifier, found {}", token_descr)); + } else { + err.span_label(self.token.span, "expected identifier"); + if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) { + err.span_suggestion( + self.token.span, + "remove this comma", + String::new(), + Applicability::MachineApplicable, + ); + } + } + err + } + + pub fn expected_one_of_not_found( + &mut self, + edible: &[TokenKind], + inedible: &[TokenKind], + ) -> PResult<'a, bool /* recovered */> { + fn tokens_to_string(tokens: &[TokenType]) -> String { + let mut i = tokens.iter(); + // This might be a sign we need a connect method on Iterator. + let b = i.next() + .map_or(String::new(), |t| t.to_string()); + i.enumerate().fold(b, |mut b, (i, a)| { + if tokens.len() > 2 && i == tokens.len() - 2 { + b.push_str(", or "); + } else if tokens.len() == 2 && i == tokens.len() - 2 { + b.push_str(" or "); + } else { + b.push_str(", "); + } + b.push_str(&a.to_string()); + b + }) + } + + let mut expected = edible.iter() + .map(|x| TokenType::Token(x.clone())) + .chain(inedible.iter().map(|x| TokenType::Token(x.clone()))) + .chain(self.expected_tokens.iter().cloned()) + .collect::<Vec<_>>(); + expected.sort_by_cached_key(|x| x.to_string()); + expected.dedup(); + let expect = tokens_to_string(&expected[..]); + let actual = self.this_token_to_string(); + let (msg_exp, (label_sp, label_exp)) = if expected.len() > 1 { + let short_expect = if expected.len() > 6 { + format!("{} possible tokens", expected.len()) + } else { + expect.clone() + }; + (format!("expected one of {}, found `{}`", expect, actual), + (self.sess.source_map().next_point(self.prev_span), + format!("expected one of {} here", short_expect))) + } else if expected.is_empty() { + (format!("unexpected token: `{}`", actual), + (self.prev_span, "unexpected token after this".to_string())) + } else { + (format!("expected {}, found `{}`", expect, actual), + (self.sess.source_map().next_point(self.prev_span), + format!("expected {} here", expect))) + }; + self.last_unexpected_token_span = Some(self.token.span); + let mut err = self.fatal(&msg_exp); + if self.token.is_ident_named(sym::and) { + err.span_suggestion_short( + self.token.span, + "use `&&` instead of `and` for the boolean operator", + "&&".to_string(), + Applicability::MaybeIncorrect, + ); + } + if self.token.is_ident_named(sym::or) { + err.span_suggestion_short( + self.token.span, + "use `||` instead of `or` for the boolean operator", + "||".to_string(), + Applicability::MaybeIncorrect, + ); + } + let sp = if self.token == token::Eof { + // This is EOF, don't want to point at the following char, but rather the last token + self.prev_span + } else { + label_sp + }; + match self.recover_closing_delimiter(&expected.iter().filter_map(|tt| match tt { + TokenType::Token(t) => Some(t.clone()), + _ => None, + }).collect::<Vec<_>>(), err) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + + let is_semi_suggestable = expected.iter().any(|t| match t { + TokenType::Token(token::Semi) => true, // we expect a `;` here + _ => false, + }) && ( // a `;` would be expected before the current keyword + self.token.is_keyword(kw::Break) || + self.token.is_keyword(kw::Continue) || + self.token.is_keyword(kw::For) || + self.token.is_keyword(kw::If) || + self.token.is_keyword(kw::Let) || + self.token.is_keyword(kw::Loop) || + self.token.is_keyword(kw::Match) || + self.token.is_keyword(kw::Return) || + self.token.is_keyword(kw::While) + ); + let sm = self.sess.source_map(); + match (sm.lookup_line(self.token.span.lo()), sm.lookup_line(sp.lo())) { + (Ok(ref a), Ok(ref b)) if a.line != b.line && is_semi_suggestable => { + // The spans are in different lines, expected `;` and found `let` or `return`. + // High likelihood that it is only a missing `;`. + err.span_suggestion_short( + label_sp, + "a semicolon may be missing here", + ";".to_string(), + Applicability::MaybeIncorrect, + ); + err.emit(); + return Ok(true); + } + (Ok(ref a), Ok(ref b)) if a.line == b.line => { + // When the spans are in the same line, it means that the only content between + // them is whitespace, point at the found token in that case: + // + // X | () => { syntax error }; + // | ^^^^^ expected one of 8 possible tokens here + // + // instead of having: + // + // X | () => { syntax error }; + // | -^^^^^ unexpected token + // | | + // | expected one of 8 possible tokens here + err.span_label(self.token.span, label_exp); + } + _ if self.prev_span == syntax_pos::DUMMY_SP => { + // Account for macro context where the previous span might not be + // available to avoid incorrect output (#54841). + err.span_label(self.token.span, "unexpected token"); + } + _ => { + err.span_label(sp, label_exp); + err.span_label(self.token.span, "unexpected token"); + } + } + self.maybe_annotate_with_ascription(&mut err, false); + Err(err) + } + + pub fn maybe_annotate_with_ascription( + &self, + err: &mut DiagnosticBuilder<'_>, + maybe_expected_semicolon: bool, + ) { + if let Some((sp, likely_path)) = self.last_type_ascription { + let sm = self.sess.source_map(); + let next_pos = sm.lookup_char_pos(self.token.span.lo()); + let op_pos = sm.lookup_char_pos(sp.hi()); + + if likely_path { + err.span_suggestion( + sp, + "maybe write a path separator here", + "::".to_string(), + match self.sess.unstable_features { + UnstableFeatures::Disallow => Applicability::MachineApplicable, + _ => Applicability::MaybeIncorrect, + }, + ); + } else if op_pos.line != next_pos.line && maybe_expected_semicolon { + err.span_suggestion( + sp, + "try using a semicolon", + ";".to_string(), + Applicability::MaybeIncorrect, + ); + } else if let UnstableFeatures::Disallow = self.sess.unstable_features { + err.span_label(sp, "tried to parse a type due to this"); + } else { + err.span_label(sp, "tried to parse a type due to this type ascription"); + } + if let UnstableFeatures::Disallow = self.sess.unstable_features { + // Give extra information about type ascription only if it's a nightly compiler. + } else { + err.note("`#![feature(type_ascription)]` lets you annotate an expression with a \ + type: `<expr>: <type>`"); + err.note("for more information, see \ + https://github.com/rust-lang/rust/issues/23416"); + } + } + } + + /// Eats and discards tokens until one of `kets` is encountered. Respects token trees, + /// passes through any errors encountered. Used for error recovery. + crate fn eat_to_tokens(&mut self, kets: &[&TokenKind]) { + let handler = self.diagnostic(); + + if let Err(ref mut err) = self.parse_seq_to_before_tokens( + kets, + SeqSep::none(), + TokenExpectType::Expect, + |p| Ok(p.parse_token_tree()), + ) { + handler.cancel(err); + } + } + + /// This function checks if there are trailing angle brackets and produces + /// a diagnostic to suggest removing them. + /// + /// ```ignore (diagnostic) + /// let _ = vec![1, 2, 3].into_iter().collect::<Vec<usize>>>>(); + /// ^^ help: remove extra angle brackets + /// ``` + crate fn check_trailing_angle_brackets(&mut self, segment: &PathSegment, end: TokenKind) { + // This function is intended to be invoked after parsing a path segment where there are two + // cases: + // + // 1. A specific token is expected after the path segment. + // eg. `x.foo(`, `x.foo::<u32>(` (parenthesis - method call), + // `Foo::`, or `Foo::<Bar>::` (mod sep - continued path). + // 2. No specific token is expected after the path segment. + // eg. `x.foo` (field access) + // + // This function is called after parsing `.foo` and before parsing the token `end` (if + // present). This includes any angle bracket arguments, such as `.foo::<u32>` or + // `Foo::<Bar>`. + + // We only care about trailing angle brackets if we previously parsed angle bracket + // arguments. This helps stop us incorrectly suggesting that extra angle brackets be + // removed in this case: + // + // `x.foo >> (3)` (where `x.foo` is a `u32` for example) + // + // This case is particularly tricky as we won't notice it just looking at the tokens - + // it will appear the same (in terms of upcoming tokens) as below (since the `::<u32>` will + // have already been parsed): + // + // `x.foo::<u32>>>(3)` + let parsed_angle_bracket_args = segment.args + .as_ref() + .map(|args| args.is_angle_bracketed()) + .unwrap_or(false); + + debug!( + "check_trailing_angle_brackets: parsed_angle_bracket_args={:?}", + parsed_angle_bracket_args, + ); + if !parsed_angle_bracket_args { + return; + } + + // Keep the span at the start so we can highlight the sequence of `>` characters to be + // removed. + let lo = self.token.span; + + // We need to look-ahead to see if we have `>` characters without moving the cursor forward + // (since we might have the field access case and the characters we're eating are + // actual operators and not trailing characters - ie `x.foo >> 3`). + let mut position = 0; + + // We can encounter `>` or `>>` tokens in any order, so we need to keep track of how + // many of each (so we can correctly pluralize our error messages) and continue to + // advance. + let mut number_of_shr = 0; + let mut number_of_gt = 0; + while self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + if *t == token::BinOp(token::BinOpToken::Shr) { + number_of_shr += 1; + true + } else if *t == token::Gt { + number_of_gt += 1; + true + } else { + false + } + }) { + position += 1; + } + + // If we didn't find any trailing `>` characters, then we have nothing to error about. + debug!( + "check_trailing_angle_brackets: number_of_gt={:?} number_of_shr={:?}", + number_of_gt, number_of_shr, + ); + if number_of_gt < 1 && number_of_shr < 1 { + return; + } + + // Finally, double check that we have our end token as otherwise this is the + // second case. + if self.look_ahead(position, |t| { + trace!("check_trailing_angle_brackets: t={:?}", t); + *t == end + }) { + // Eat from where we started until the end token so that parsing can continue + // as if we didn't have those extra angle brackets. + self.eat_to_tokens(&[&end]); + let span = lo.until(self.token.span); + + let plural = number_of_gt > 1 || number_of_shr >= 1; + self.diagnostic() + .struct_span_err( + span, + &format!("unmatched angle bracket{}", if plural { "s" } else { "" }), + ) + .span_suggestion( + span, + &format!("remove extra angle bracket{}", if plural { "s" } else { "" }), + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + /// Produce an error if comparison operators are chained (RFC #558). + /// We only need to check lhs, not rhs, because all comparison ops + /// have same precedence and are left-associative + crate fn check_no_chained_comparison(&self, lhs: &Expr, outer_op: &AssocOp) { + debug_assert!(outer_op.is_comparison(), + "check_no_chained_comparison: {:?} is not comparison", + outer_op); + match lhs.node { + ExprKind::Binary(op, _, _) if op.node.is_comparison() => { + // respan to include both operators + let op_span = op.span.to(self.token.span); + let mut err = self.struct_span_err( + op_span, + "chained comparison operators require parentheses", + ); + if op.node == BinOpKind::Lt && + *outer_op == AssocOp::Less || // Include `<` to provide this recommendation + *outer_op == AssocOp::Greater // even in a case like the following: + { // Foo<Bar<Baz<Qux, ()>>> + err.help( + "use `::<...>` instead of `<...>` if you meant to specify type arguments"); + err.help("or use `(...)` if you meant to specify fn arguments"); + } + err.emit(); + } + _ => {} + } + } + + crate fn maybe_report_ambiguous_plus( + &mut self, + allow_plus: bool, + impl_dyn_multi: bool, + ty: &Ty, + ) { + if !allow_plus && impl_dyn_multi { + let sum_with_parens = format!("({})", pprust::ty_to_string(&ty)); + self.struct_span_err(ty.span, "ambiguous `+` in a type") + .span_suggestion( + ty.span, + "use parentheses to disambiguate", + sum_with_parens, + Applicability::MachineApplicable, + ) + .emit(); + } + } + + crate fn maybe_report_invalid_custom_discriminants( + sess: &ParseSess, + variants: &[Spanned<ast::Variant_>], + ) { + let has_fields = variants.iter().any(|variant| match variant.node.data { + VariantData::Tuple(..) | VariantData::Struct(..) => true, + VariantData::Unit(..) => false, + }); + + let discriminant_spans = variants.iter().filter(|variant| match variant.node.data { + VariantData::Tuple(..) | VariantData::Struct(..) => false, + VariantData::Unit(..) => true, + }) + .filter_map(|variant| variant.node.disr_expr.as_ref().map(|c| c.value.span)) + .collect::<Vec<_>>(); + + if !discriminant_spans.is_empty() && has_fields { + let mut err = feature_err( + sess, + sym::arbitrary_enum_discriminant, + discriminant_spans.clone(), + crate::feature_gate::GateIssue::Language, + "custom discriminant values are not allowed in enums with tuple or struct variants", + ); + for sp in discriminant_spans { + err.span_label(sp, "disallowed custom discriminant"); + } + for variant in variants.iter() { + match &variant.node.data { + VariantData::Struct(..) => { + err.span_label( + variant.span, + "struct variant defined here", + ); + } + VariantData::Tuple(..) => { + err.span_label( + variant.span, + "tuple variant defined here", + ); + } + VariantData::Unit(..) => {} + } + } + err.emit(); + } + } + + crate fn maybe_recover_from_bad_type_plus( + &mut self, + allow_plus: bool, + ty: &Ty, + ) -> PResult<'a, ()> { + // Do not add `+` to expected tokens. + if !allow_plus || !self.token.is_like_plus() { + return Ok(()); + } + + self.bump(); // `+` + let bounds = self.parse_generic_bounds(None)?; + let sum_span = ty.span.to(self.prev_span); + + let mut err = struct_span_err!( + self.sess.span_diagnostic, + sum_span, + E0178, + "expected a path on the left-hand side of `+`, not `{}`", + pprust::ty_to_string(ty) + ); + + match ty.node { + TyKind::Rptr(ref lifetime, ref mut_ty) => { + let sum_with_parens = pprust::to_string(|s| { + s.s.word("&"); + s.print_opt_lifetime(lifetime); + s.print_mutability(mut_ty.mutbl); + s.popen(); + s.print_type(&mut_ty.ty); + s.print_type_bounds(" +", &bounds); + s.pclose() + }); + err.span_suggestion( + sum_span, + "try adding parentheses", + sum_with_parens, + Applicability::MachineApplicable, + ); + } + TyKind::Ptr(..) | TyKind::BareFn(..) => { + err.span_label(sum_span, "perhaps you forgot parentheses?"); + } + _ => { + err.span_label(sum_span, "expected a path"); + } + } + err.emit(); + Ok(()) + } + + /// Try to recover from associated item paths like `[T]::AssocItem`/`(T, U)::AssocItem`. + /// Attempt to convert the base expression/pattern/type into a type, parse the `::AssocItem` + /// tail, and combine them into a `<Ty>::AssocItem` expression/pattern/type. + crate fn maybe_recover_from_bad_qpath<T: RecoverQPath>( + &mut self, + base: P<T>, + allow_recovery: bool, + ) -> PResult<'a, P<T>> { + // Do not add `::` to expected tokens. + if allow_recovery && self.token == token::ModSep { + if let Some(ty) = base.to_ty() { + return self.maybe_recover_from_bad_qpath_stage_2(ty.span, ty); + } + } + Ok(base) + } + + /// Given an already parsed `Ty` parse the `::AssocItem` tail and + /// combine them into a `<Ty>::AssocItem` expression/pattern/type. + crate fn maybe_recover_from_bad_qpath_stage_2<T: RecoverQPath>( + &mut self, + ty_span: Span, + ty: P<Ty>, + ) -> PResult<'a, P<T>> { + self.expect(&token::ModSep)?; + + let mut path = ast::Path { + segments: Vec::new(), + span: DUMMY_SP, + }; + self.parse_path_segments(&mut path.segments, T::PATH_STYLE)?; + path.span = ty_span.to(self.prev_span); + + let ty_str = self + .span_to_snippet(ty_span) + .unwrap_or_else(|_| pprust::ty_to_string(&ty)); + self.diagnostic() + .struct_span_err(path.span, "missing angle brackets in associated item path") + .span_suggestion( + // this is a best-effort recovery + path.span, + "try", + format!("<{}>::{}", ty_str, path), + Applicability::MaybeIncorrect, + ) + .emit(); + + let path_span = ty_span.shrink_to_hi(); // use an empty path since `position` == 0 + Ok(P(T::recovered( + Some(QSelf { + ty, + path_span, + position: 0, + }), + path, + ))) + } + + crate fn maybe_consume_incorrect_semicolon(&mut self, items: &[P<Item>]) -> bool { + if self.eat(&token::Semi) { + let mut err = self.struct_span_err(self.prev_span, "expected item, found `;`"); + err.span_suggestion_short( + self.prev_span, + "remove this semicolon", + String::new(), + Applicability::MachineApplicable, + ); + if !items.is_empty() { + let previous_item = &items[items.len() - 1]; + let previous_item_kind_name = match previous_item.node { + // say "braced struct" because tuple-structs and + // braceless-empty-struct declarations do take a semicolon + ItemKind::Struct(..) => Some("braced struct"), + ItemKind::Enum(..) => Some("enum"), + ItemKind::Trait(..) => Some("trait"), + ItemKind::Union(..) => Some("union"), + _ => None, + }; + if let Some(name) = previous_item_kind_name { + err.help(&format!( + "{} declarations are not followed by a semicolon", + name + )); + } + } + err.emit(); + true + } else { + false + } + } + + /// Create a `DiagnosticBuilder` for an unexpected token `t` and try to recover if it is a + /// closing delimiter. + pub fn unexpected_try_recover( + &mut self, + t: &TokenKind, + ) -> PResult<'a, bool /* recovered */> { + let token_str = pprust::token_kind_to_string(t); + let this_token_str = self.this_token_descr(); + let (prev_sp, sp) = match (&self.token.kind, self.subparser_name) { + // Point at the end of the macro call when reaching end of macro arguments. + (token::Eof, Some(_)) => { + let sp = self.sess.source_map().next_point(self.token.span); + (sp, sp) + } + // We don't want to point at the following span after DUMMY_SP. + // This happens when the parser finds an empty TokenStream. + _ if self.prev_span == DUMMY_SP => (self.token.span, self.token.span), + // EOF, don't want to point at the following char, but rather the last token. + (token::Eof, None) => (self.prev_span, self.token.span), + _ => (self.sess.source_map().next_point(self.prev_span), self.token.span), + }; + let msg = format!( + "expected `{}`, found {}", + token_str, + match (&self.token.kind, self.subparser_name) { + (token::Eof, Some(origin)) => format!("end of {}", origin), + _ => this_token_str, + }, + ); + let mut err = self.struct_span_err(sp, &msg); + let label_exp = format!("expected `{}`", token_str); + match self.recover_closing_delimiter(&[t.clone()], err) { + Err(e) => err = e, + Ok(recovered) => { + return Ok(recovered); + } + } + let sm = self.sess.source_map(); + match (sm.lookup_line(prev_sp.lo()), sm.lookup_line(sp.lo())) { + (Ok(ref a), Ok(ref b)) if a.line == b.line => { + // When the spans are in the same line, it means that the only content + // between them is whitespace, point only at the found token. + err.span_label(sp, label_exp); + } + _ => { + err.span_label(prev_sp, label_exp); + err.span_label(sp, "unexpected token"); + } + } + Err(err) + } + + crate fn parse_semi_or_incorrect_foreign_fn_body( + &mut self, + ident: &Ident, + extern_sp: Span, + ) -> PResult<'a, ()> { + if self.token != token::Semi { + // this might be an incorrect fn definition (#62109) + let parser_snapshot = self.clone(); + match self.parse_inner_attrs_and_block() { + Ok((_, body)) => { + self.struct_span_err(ident.span, "incorrect `fn` inside `extern` block") + .span_label(ident.span, "can't have a body") + .span_label(body.span, "this body is invalid here") + .span_label( + extern_sp, + "`extern` blocks define existing foreign functions and `fn`s \ + inside of them cannot have a body") + .help("you might have meant to write a function accessible through ffi, \ + which can be done by writing `extern fn` outside of the \ + `extern` block") + .note("for more information, visit \ + https://doc.rust-lang.org/std/keyword.extern.html") + .emit(); + } + Err(mut err) => { + err.cancel(); + mem::replace(self, parser_snapshot); + self.expect(&token::Semi)?; + } + } + } else { + self.bump(); + } + Ok(()) + } + + /// Consume alternative await syntaxes like `await!(<expr>)`, `await <expr>`, + /// `await? <expr>`, `await(<expr>)`, and `await { <expr> }`. + crate fn parse_incorrect_await_syntax( + &mut self, + lo: Span, + await_sp: Span, + ) -> PResult<'a, (Span, ExprKind)> { + if self.token == token::Not { + // Handle `await!(<expr>)`. + self.expect(&token::Not)?; + self.expect(&token::OpenDelim(token::Paren))?; + let expr = self.parse_expr()?; + self.expect(&token::CloseDelim(token::Paren))?; + let sp = self.error_on_incorrect_await(lo, self.prev_span, &expr, false); + return Ok((sp, ExprKind::Await(expr))) + } + + let is_question = self.eat(&token::Question); // Handle `await? <expr>`. + let expr = if self.token == token::OpenDelim(token::Brace) { + // Handle `await { <expr> }`. + // This needs to be handled separatedly from the next arm to avoid + // interpreting `await { <expr> }?` as `<expr>?.await`. + self.parse_block_expr( + None, + self.token.span, + BlockCheckMode::Default, + ThinVec::new(), + ) + } else { + self.parse_expr() + }.map_err(|mut err| { + err.span_label(await_sp, "while parsing this incorrect await expression"); + err + })?; + let sp = self.error_on_incorrect_await(lo, expr.span, &expr, is_question); + Ok((sp, ExprKind::Await(expr))) + } + + fn error_on_incorrect_await(&self, lo: Span, hi: Span, expr: &Expr, is_question: bool) -> Span { + let expr_str = self.span_to_snippet(expr.span) + .unwrap_or_else(|_| pprust::expr_to_string(&expr)); + let suggestion = format!("{}.await{}", expr_str, if is_question { "?" } else { "" }); + let sp = lo.to(hi); + let app = match expr.node { + ExprKind::Try(_) => Applicability::MaybeIncorrect, // `await <expr>?` + _ => Applicability::MachineApplicable, + }; + self.struct_span_err(sp, "incorrect use of `await`") + .span_suggestion(sp, "`await` is a postfix operation", suggestion, app) + .emit(); + sp + } + + /// If encountering `future.await()`, consume and emit error. + crate fn recover_from_await_method_call(&mut self) { + if self.token == token::OpenDelim(token::Paren) && + self.look_ahead(1, |t| t == &token::CloseDelim(token::Paren)) + { + // future.await() + let lo = self.token.span; + self.bump(); // ( + let sp = lo.to(self.token.span); + self.bump(); // ) + self.struct_span_err(sp, "incorrect use of `await`") + .span_suggestion( + sp, + "`await` is not a method call, remove the parentheses", + String::new(), + Applicability::MachineApplicable, + ).emit() + } + } + + /// Recover a situation like `for ( $pat in $expr )` + /// and suggest writing `for $pat in $expr` instead. + /// + /// This should be called before parsing the `$block`. + crate fn recover_parens_around_for_head( + &mut self, + pat: P<Pat>, + expr: &Expr, + begin_paren: Option<Span>, + ) -> P<Pat> { + match (&self.token.kind, begin_paren) { + (token::CloseDelim(token::Paren), Some(begin_par_sp)) => { + self.bump(); + + let pat_str = self + // Remove the `(` from the span of the pattern: + .span_to_snippet(pat.span.trim_start(begin_par_sp).unwrap()) + .unwrap_or_else(|_| pprust::pat_to_string(&pat)); + + self.struct_span_err(self.prev_span, "unexpected closing `)`") + .span_label(begin_par_sp, "opening `(`") + .span_suggestion( + begin_par_sp.to(self.prev_span), + "remove parenthesis in `for` loop", + format!("{} in {}", pat_str, pprust::expr_to_string(&expr)), + // With e.g. `for (x) in y)` this would replace `(x) in y)` + // with `x) in y)` which is syntactically invalid. + // However, this is prevented before we get here. + Applicability::MachineApplicable, + ) + .emit(); + + // Unwrap `(pat)` into `pat` to avoid the `unused_parens` lint. + pat.and_then(|pat| match pat.node { + PatKind::Paren(pat) => pat, + _ => P(pat), + }) + } + _ => pat, + } + } + + crate fn could_ascription_be_path(&self, node: &ast::ExprKind) -> bool { + self.token.is_ident() && + if let ast::ExprKind::Path(..) = node { true } else { false } && + !self.token.is_reserved_ident() && // v `foo:bar(baz)` + self.look_ahead(1, |t| t == &token::OpenDelim(token::Paren)) || + self.look_ahead(1, |t| t == &token::Lt) && // `foo:bar<baz` + self.look_ahead(2, |t| t.is_ident()) || + self.look_ahead(1, |t| t == &token::Colon) && // `foo:bar:baz` + self.look_ahead(2, |t| t.is_ident()) || + self.look_ahead(1, |t| t == &token::ModSep) && + (self.look_ahead(2, |t| t.is_ident()) || // `foo:bar::baz` + self.look_ahead(2, |t| t == &token::Lt)) // `foo:bar::<baz>` + } + + crate fn recover_seq_parse_error( + &mut self, + delim: token::DelimToken, + lo: Span, + result: PResult<'a, P<Expr>>, + ) -> P<Expr> { + match result { + Ok(x) => x, + Err(mut err) => { + err.emit(); + // recover from parse error + self.consume_block(delim); + self.mk_expr(lo.to(self.prev_span), ExprKind::Err, ThinVec::new()) + } + } + } + + crate fn recover_closing_delimiter( + &mut self, + tokens: &[TokenKind], + mut err: DiagnosticBuilder<'a>, + ) -> PResult<'a, bool> { + let mut pos = None; + // we want to use the last closing delim that would apply + for (i, unmatched) in self.unclosed_delims.iter().enumerate().rev() { + if tokens.contains(&token::CloseDelim(unmatched.expected_delim)) + && Some(self.token.span) > unmatched.unclosed_span + { + pos = Some(i); + } + } + match pos { + Some(pos) => { + // Recover and assume that the detected unclosed delimiter was meant for + // this location. Emit the diagnostic and act as if the delimiter was + // present for the parser's sake. + + // Don't attempt to recover from this unclosed delimiter more than once. + let unmatched = self.unclosed_delims.remove(pos); + let delim = TokenType::Token(token::CloseDelim(unmatched.expected_delim)); + + // We want to suggest the inclusion of the closing delimiter where it makes + // the most sense, which is immediately after the last token: + // + // {foo(bar {}} + // - ^ + // | | + // | help: `)` may belong here + // | + // unclosed delimiter + if let Some(sp) = unmatched.unclosed_span { + err.span_label(sp, "unclosed delimiter"); + } + err.span_suggestion_short( + self.sess.source_map().next_point(self.prev_span), + &format!("{} may belong here", delim.to_string()), + delim.to_string(), + Applicability::MaybeIncorrect, + ); + err.emit(); + self.expected_tokens.clear(); // reduce errors + Ok(true) + } + _ => Err(err), + } + } + + /// Recover from `pub` keyword in places where it seems _reasonable_ but isn't valid. + crate fn eat_bad_pub(&mut self) { + if self.token.is_keyword(kw::Pub) { + match self.parse_visibility(false) { + Ok(vis) => { + self.diagnostic() + .struct_span_err(vis.span, "unnecessary visibility qualifier") + .span_label(vis.span, "`pub` not permitted here") + .emit(); + } + Err(mut err) => err.emit(), + } + } + } + + // Eat tokens until we can be relatively sure we reached the end of the + // statement. This is something of a best-effort heuristic. + // + // We terminate when we find an unmatched `}` (without consuming it). + crate fn recover_stmt(&mut self) { + self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore) + } + + // If `break_on_semi` is `Break`, then we will stop consuming tokens after + // finding (and consuming) a `;` outside of `{}` or `[]` (note that this is + // approximate - it can mean we break too early due to macros, but that + // should only lead to sub-optimal recovery, not inaccurate parsing). + // + // If `break_on_block` is `Break`, then we will stop consuming tokens + // after finding (and consuming) a brace-delimited block. + crate fn recover_stmt_(&mut self, break_on_semi: SemiColonMode, break_on_block: BlockMode) { + let mut brace_depth = 0; + let mut bracket_depth = 0; + let mut in_block = false; + debug!("recover_stmt_ enter loop (semi={:?}, block={:?})", + break_on_semi, break_on_block); + loop { + debug!("recover_stmt_ loop {:?}", self.token); + match self.token.kind { + token::OpenDelim(token::DelimToken::Brace) => { + brace_depth += 1; + self.bump(); + if break_on_block == BlockMode::Break && + brace_depth == 1 && + bracket_depth == 0 { + in_block = true; + } + } + token::OpenDelim(token::DelimToken::Bracket) => { + bracket_depth += 1; + self.bump(); + } + token::CloseDelim(token::DelimToken::Brace) => { + if brace_depth == 0 { + debug!("recover_stmt_ return - close delim {:?}", self.token); + break; + } + brace_depth -= 1; + self.bump(); + if in_block && bracket_depth == 0 && brace_depth == 0 { + debug!("recover_stmt_ return - block end {:?}", self.token); + break; + } + } + token::CloseDelim(token::DelimToken::Bracket) => { + bracket_depth -= 1; + if bracket_depth < 0 { + bracket_depth = 0; + } + self.bump(); + } + token::Eof => { + debug!("recover_stmt_ return - Eof"); + break; + } + token::Semi => { + self.bump(); + if break_on_semi == SemiColonMode::Break && + brace_depth == 0 && + bracket_depth == 0 { + debug!("recover_stmt_ return - Semi"); + break; + } + } + token::Comma if break_on_semi == SemiColonMode::Comma && + brace_depth == 0 && + bracket_depth == 0 => + { + debug!("recover_stmt_ return - Semi"); + break; + } + _ => { + self.bump() + } + } + } + } + + crate fn check_for_for_in_in_typo(&mut self, in_span: Span) { + if self.eat_keyword(kw::In) { + // a common typo: `for _ in in bar {}` + self.struct_span_err(self.prev_span, "expected iterable, found keyword `in`") + .span_suggestion_short( + in_span.until(self.prev_span), + "remove the duplicated `in`", + String::new(), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + crate fn expected_semi_or_open_brace(&mut self) -> PResult<'a, ast::TraitItem> { + let token_str = self.this_token_descr(); + let mut err = self.fatal(&format!("expected `;` or `{{`, found {}", token_str)); + err.span_label(self.token.span, "expected `;` or `{`"); + Err(err) + } + + crate fn eat_incorrect_doc_comment_for_arg_type(&mut self) { + if let token::DocComment(_) = self.token.kind { + self.struct_span_err( + self.token.span, + "documentation comments cannot be applied to a function parameter's type", + ) + .span_label(self.token.span, "doc comments are not allowed here") + .emit(); + self.bump(); + } else if self.token == token::Pound && self.look_ahead(1, |t| { + *t == token::OpenDelim(token::Bracket) + }) { + let lo = self.token.span; + // Skip every token until next possible arg. + while self.token != token::CloseDelim(token::Bracket) { + self.bump(); + } + let sp = lo.to(self.token.span); + self.bump(); + self.struct_span_err( + sp, + "attributes cannot be applied to a function parameter's type", + ) + .span_label(sp, "attributes are not allowed here") + .emit(); + } + } + + crate fn argument_without_type( + &mut self, + err: &mut DiagnosticBuilder<'_>, + pat: P<ast::Pat>, + require_name: bool, + is_trait_item: bool, + ) -> Option<Ident> { + // If we find a pattern followed by an identifier, it could be an (incorrect) + // C-style parameter declaration. + if self.check_ident() && self.look_ahead(1, |t| { + *t == token::Comma || *t == token::CloseDelim(token::Paren) + }) { // `fn foo(String s) {}` + let ident = self.parse_ident().unwrap(); + let span = pat.span.with_hi(ident.span.hi()); + + err.span_suggestion( + span, + "declare the type after the parameter binding", + String::from("<identifier>: <type>"), + Applicability::HasPlaceholders, + ); + return Some(ident); + } else if let PatKind::Ident(_, ident, _) = pat.node { + if require_name && ( + is_trait_item || + self.token == token::Comma || + self.token == token::CloseDelim(token::Paren) + ) { // `fn foo(a, b) {}` or `fn foo(usize, usize) {}` + err.span_suggestion( + pat.span, + "if this was a parameter name, give it a type", + format!("{}: TypeName", ident), + Applicability::HasPlaceholders, + ); + err.span_suggestion( + pat.span, + "if this is a type, explicitly ignore the parameter name", + format!("_: {}", ident), + Applicability::MachineApplicable, + ); + err.note("anonymous parameters are removed in the 2018 edition (see RFC 1685)"); + return Some(ident); + } + } + None + } + + crate fn recover_arg_parse(&mut self) -> PResult<'a, (P<ast::Pat>, P<ast::Ty>)> { + let pat = self.parse_pat(Some("argument name"))?; + self.expect(&token::Colon)?; + let ty = self.parse_ty()?; + + self.diagnostic() + .struct_span_err_with_code( + pat.span, + "patterns aren't allowed in methods without bodies", + DiagnosticId::Error("E0642".into()), + ) + .span_suggestion_short( + pat.span, + "give this argument a name or use an underscore to ignore it", + "_".to_owned(), + Applicability::MachineApplicable, + ) + .emit(); + + // Pretend the pattern is `_`, to avoid duplicate errors from AST validation. + let pat = P(Pat { + node: PatKind::Wild, + span: pat.span, + id: ast::DUMMY_NODE_ID + }); + Ok((pat, ty)) + } + + crate fn recover_bad_self_arg( + &mut self, + mut arg: ast::Arg, + is_trait_item: bool, + ) -> PResult<'a, ast::Arg> { + let sp = arg.pat.span; + arg.ty.node = TyKind::Err; + let mut err = self.struct_span_err(sp, "unexpected `self` parameter in function"); + if is_trait_item { + err.span_label(sp, "must be the first associated function parameter"); + } else { + err.span_label(sp, "not valid as function parameter"); + err.note("`self` is only valid as the first parameter of an associated function"); + } + err.emit(); + Ok(arg) + } + + crate fn consume_block(&mut self, delim: token::DelimToken) { + let mut brace_depth = 0; + loop { + if self.eat(&token::OpenDelim(delim)) { + brace_depth += 1; + } else if self.eat(&token::CloseDelim(delim)) { + if brace_depth == 0 { + return; + } else { + brace_depth -= 1; + continue; + } + } else if self.token == token::Eof || self.eat(&token::CloseDelim(token::NoDelim)) { + return; + } else { + self.bump(); + } + } + } + + crate fn expected_expression_found(&self) -> DiagnosticBuilder<'a> { + let (span, msg) = match (&self.token.kind, self.subparser_name) { + (&token::Eof, Some(origin)) => { + let sp = self.sess.source_map().next_point(self.token.span); + (sp, format!("expected expression, found end of {}", origin)) + } + _ => (self.token.span, format!( + "expected expression, found {}", + self.this_token_descr(), + )), + }; + let mut err = self.struct_span_err(span, &msg); + let sp = self.sess.source_map().start_point(self.token.span); + if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&sp) { + self.sess.expr_parentheses_needed(&mut err, *sp, None); + } + err.span_label(span, "expected expression"); + err + } + + /// Replace duplicated recovered arguments with `_` pattern to avoid unecessary errors. + /// + /// This is necessary because at this point we don't know whether we parsed a function with + /// anonymous arguments or a function with names but no types. In order to minimize + /// unecessary errors, we assume the arguments are in the shape of `fn foo(a, b, c)` where + /// the arguments are *names* (so we don't emit errors about not being able to find `b` in + /// the local scope), but if we find the same name multiple times, like in `fn foo(i8, i8)`, + /// we deduplicate them to not complain about duplicated argument names. + crate fn deduplicate_recovered_arg_names(&self, fn_inputs: &mut Vec<Arg>) { + let mut seen_inputs = FxHashSet::default(); + for input in fn_inputs.iter_mut() { + let opt_ident = if let (PatKind::Ident(_, ident, _), TyKind::Err) = ( + &input.pat.node, &input.ty.node, + ) { + Some(*ident) + } else { + None + }; + if let Some(ident) = opt_ident { + if seen_inputs.contains(&ident) { + input.pat.node = PatKind::Wild; + } + seen_inputs.insert(ident); + } + } + } +} diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 74fff3324ea..5121a9ef7b5 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -2,16 +2,16 @@ pub use CommentStyle::*; use crate::ast; use crate::source_map::SourceMap; -use crate::parse::lexer::{is_block_doc_comment, is_pattern_whitespace}; -use crate::parse::lexer::{self, ParseSess, StringReader, TokenAndSpan}; -use crate::print::pprust; +use crate::parse::lexer::is_block_doc_comment; +use crate::parse::lexer::ParseSess; use syntax_pos::{BytePos, CharPos, Pos, FileName}; -use log::debug; -use std::io::Read; use std::usize; +#[cfg(test)] +mod tests; + #[derive(Clone, Copy, PartialEq, Debug)] pub enum CommentStyle { /// No code on either side of each line of the comment @@ -137,66 +137,6 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String { panic!("not a doc-comment: {}", comment); } -fn push_blank_line_comment(rdr: &StringReader<'_>, comments: &mut Vec<Comment>) { - debug!(">>> blank-line comment"); - comments.push(Comment { - style: BlankLine, - lines: Vec::new(), - pos: rdr.pos, - }); -} - -fn consume_whitespace_counting_blank_lines( - rdr: &mut StringReader<'_>, - comments: &mut Vec<Comment> -) { - while is_pattern_whitespace(rdr.ch) && !rdr.is_eof() { - if rdr.ch_is('\n') { - push_blank_line_comment(rdr, &mut *comments); - } - rdr.bump(); - } -} - -fn read_shebang_comment(rdr: &mut StringReader<'_>, - code_to_the_left: bool, - comments: &mut Vec<Comment>) { - debug!(">>> shebang comment"); - let p = rdr.pos; - debug!("<<< shebang comment"); - comments.push(Comment { - style: if code_to_the_left { Trailing } else { Isolated }, - lines: vec![rdr.read_one_line_comment()], - pos: p, - }); -} - -fn read_line_comments(rdr: &mut StringReader<'_>, - code_to_the_left: bool, - comments: &mut Vec<Comment>) { - debug!(">>> line comments"); - let p = rdr.pos; - let mut lines: Vec<String> = Vec::new(); - while rdr.ch_is('/') && rdr.nextch_is('/') { - let line = rdr.read_one_line_comment(); - debug!("{}", line); - // Doc comments are not put in comments. - if is_doc_comment(&line[..]) { - break; - } - lines.push(line); - rdr.consume_non_eol_whitespace(); - } - debug!("<<< line comments"); - if !lines.is_empty() { - comments.push(Comment { - style: if code_to_the_left { Trailing } else { Isolated }, - lines, - pos: p, - }); - } -} - /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. /// Otherwise returns `Some(k)` where `k` is first char offset after that leading /// whitespace. Note that `k` may be outside bounds of `s`. @@ -211,246 +151,104 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { Some(idx) } -fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) { +fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str { let len = s.len(); - let s1 = match all_whitespace(&s[..], col) { - Some(col) => { - if col < len { - s[col..len].to_string() - } else { - String::new() - } - } + match all_whitespace(&s, col) { + Some(col) => if col < len { &s[col..] } else { "" }, None => s, - }; - debug!("pushing line: {}", s1); - lines.push(s1); -} - -fn read_block_comment(rdr: &mut StringReader<'_>, - code_to_the_left: bool, - comments: &mut Vec<Comment>) { - debug!(">>> block comment"); - let p = rdr.pos; - let mut lines: Vec<String> = Vec::new(); - - // Count the number of chars since the start of the line by rescanning. - let src_index = rdr.src_index(rdr.source_file.line_begin_pos(rdr.pos)); - let end_src_index = rdr.src_index(rdr.pos); - assert!(src_index <= end_src_index, - "src_index={}, end_src_index={}, line_begin_pos={}", - src_index, end_src_index, rdr.source_file.line_begin_pos(rdr.pos).to_u32()); - - let col = CharPos(rdr.src[src_index..end_src_index].chars().count()); - - rdr.bump(); - rdr.bump(); - - let mut curr_line = String::from("/*"); - - // doc-comments are not really comments, they are attributes - if (rdr.ch_is('*') && !rdr.nextch_is('*')) || rdr.ch_is('!') { - while !(rdr.ch_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() { - curr_line.push(rdr.ch.unwrap()); - rdr.bump(); - } - if !rdr.is_eof() { - curr_line.push_str("*/"); - rdr.bump(); - rdr.bump(); - } - if is_block_doc_comment(&curr_line[..]) { - return; - } - assert!(!curr_line.contains('\n')); - lines.push(curr_line); - } else { - let mut level: isize = 1; - while level > 0 { - debug!("=== block comment level {}", level); - if rdr.is_eof() { - rdr.fatal("unterminated block comment").raise(); - } - if rdr.ch_is('\n') { - trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); - curr_line = String::new(); - rdr.bump(); - } else { - curr_line.push(rdr.ch.unwrap()); - if rdr.ch_is('/') && rdr.nextch_is('*') { - rdr.bump(); - rdr.bump(); - curr_line.push('*'); - level += 1; - } else { - if rdr.ch_is('*') && rdr.nextch_is('/') { - rdr.bump(); - rdr.bump(); - curr_line.push('/'); - level -= 1; - } else { - rdr.bump(); - } - } - } - } - if !curr_line.is_empty() { - trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); - } - } - - let mut style = if code_to_the_left { - Trailing - } else { - Isolated - }; - rdr.consume_non_eol_whitespace(); - if !rdr.is_eof() && !rdr.ch_is('\n') && lines.len() == 1 { - style = Mixed; } - debug!("<<< block comment"); - comments.push(Comment { - style, - lines, - pos: p, - }); } - -fn consume_comment(rdr: &mut StringReader<'_>, - comments: &mut Vec<Comment>, - code_to_the_left: &mut bool, - anything_to_the_left: &mut bool) { - debug!(">>> consume comment"); - if rdr.ch_is('/') && rdr.nextch_is('/') { - read_line_comments(rdr, *code_to_the_left, comments); - *code_to_the_left = false; - *anything_to_the_left = false; - } else if rdr.ch_is('/') && rdr.nextch_is('*') { - read_block_comment(rdr, *code_to_the_left, comments); - *anything_to_the_left = true; - } else if rdr.ch_is('#') && rdr.nextch_is('!') { - read_shebang_comment(rdr, *code_to_the_left, comments); - *code_to_the_left = false; - *anything_to_the_left = false; - } else { - panic!(); +fn split_block_comment_into_lines( + text: &str, + col: CharPos, +) -> Vec<String> { + let mut res: Vec<String> = vec![]; + let mut lines = text.lines(); + // just push the first line + res.extend(lines.next().map(|it| it.to_string())); + // for other lines, strip common whitespace prefix + for line in lines { + res.push(trim_whitespace_prefix(line, col).to_string()) } - debug!("<<< consume comment"); -} - -#[derive(Clone)] -pub struct Literal { - pub lit: String, - pub pos: BytePos, + res } // it appears this function is called only from pprust... that's // probably not a good thing. -pub fn gather_comments_and_literals(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) - -> (Vec<Comment>, Vec<Literal>) -{ - let mut src = String::new(); - srdr.read_to_string(&mut src).unwrap(); +pub fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> { let cm = SourceMap::new(sess.source_map().path_mapping().clone()); let source_file = cm.new_source_file(path, src); - let mut rdr = lexer::StringReader::new_raw(sess, source_file, None); + let text = (*source_file.src.as_ref().unwrap()).clone(); + let text: &str = text.as_str(); + let start_bpos = source_file.start_pos; + let mut pos = 0; let mut comments: Vec<Comment> = Vec::new(); - let mut literals: Vec<Literal> = Vec::new(); - let mut code_to_the_left = false; // Only code - let mut anything_to_the_left = false; // Code or comments + let mut code_to_the_left = false; - while !rdr.is_eof() { - loop { - // Eat all the whitespace and count blank lines. - rdr.consume_non_eol_whitespace(); - if rdr.ch_is('\n') { - if anything_to_the_left { - rdr.bump(); // The line is not blank, do not count. + if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { + comments.push(Comment { + style: Isolated, + lines: vec![text[..shebang_len].to_string()], + pos: start_bpos, + }); + pos += shebang_len; + } + + for token in rustc_lexer::tokenize(&text[pos..]) { + let token_text = &text[pos..pos + token.len]; + match token.kind { + rustc_lexer::TokenKind::Whitespace => { + if let Some(mut idx) = token_text.find('\n') { + code_to_the_left = false; + while let Some(next_newline) = &token_text[idx + 1..].find('\n') { + idx = idx + 1 + next_newline; + comments.push(Comment { + style: BlankLine, + lines: vec![], + pos: start_bpos + BytePos((pos + idx) as u32), + }); + } } - consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); - code_to_the_left = false; - anything_to_the_left = false; } - // Eat one comment group - if rdr.peeking_at_comment() { - consume_comment(&mut rdr, &mut comments, - &mut code_to_the_left, &mut anything_to_the_left); - } else { - break + rustc_lexer::TokenKind::BlockComment { terminated: _ } => { + if !is_block_doc_comment(token_text) { + let code_to_the_right = match text[pos + token.len..].chars().next() { + Some('\r') | Some('\n') => false, + _ => true, + }; + let style = match (code_to_the_left, code_to_the_right) { + (true, true) | (false, true) => Mixed, + (false, false) => Isolated, + (true, false) => Trailing, + }; + + // Count the number of chars since the start of the line by rescanning. + let pos_in_file = start_bpos + BytePos(pos as u32); + let line_begin_in_file = source_file.line_begin_pos(pos_in_file); + let line_begin_pos = (line_begin_in_file - start_bpos).to_usize(); + let col = CharPos(text[line_begin_pos..pos].chars().count()); + + let lines = split_block_comment_into_lines(token_text, col); + comments.push(Comment { style, lines, pos: pos_in_file }) + } + } + rustc_lexer::TokenKind::LineComment => { + if !is_doc_comment(token_text) { + comments.push(Comment { + style: if code_to_the_left { Trailing } else { Isolated }, + lines: vec![token_text.to_string()], + pos: start_bpos + BytePos(pos as u32), + }) + } + } + _ => { + code_to_the_left = true; } } - - let bstart = rdr.pos; - rdr.next_token(); - // discard, and look ahead; we're working with internal state - let TokenAndSpan { tok, sp } = rdr.peek(); - if tok.is_lit() { - rdr.with_str_from(bstart, |s| { - debug!("tok lit: {}", s); - literals.push(Literal { - lit: s.to_string(), - pos: sp.lo(), - }); - }) - } else { - debug!("tok: {}", pprust::token_to_string(&tok)); - } - code_to_the_left = true; - anything_to_the_left = true; + pos += token.len; } - (comments, literals) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_block_doc_comment_1() { - let comment = "/**\n * Test \n ** Test\n * Test\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " Test \n* Test\n Test"); - } - - #[test] - fn test_block_doc_comment_2() { - let comment = "/**\n * Test\n * Test\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " Test\n Test"); - } - - #[test] - fn test_block_doc_comment_3() { - let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " let a: *i32;\n *a = 5;"); - } - - #[test] - fn test_block_doc_comment_4() { - let comment = "/*******************\n test\n *********************/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " test"); - } - - #[test] - fn test_line_doc_comment() { - let stripped = strip_doc_comment_decoration("/// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("///! test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("///test"); - assert_eq!(stripped, "test"); - let stripped = strip_doc_comment_decoration("///!test"); - assert_eq!(stripped, "test"); - let stripped = strip_doc_comment_decoration("//test"); - assert_eq!(stripped, "test"); - } + comments } diff --git a/src/libsyntax/parse/lexer/comments/tests.rs b/src/libsyntax/parse/lexer/comments/tests.rs new file mode 100644 index 00000000000..f9cd69fb50d --- /dev/null +++ b/src/libsyntax/parse/lexer/comments/tests.rs @@ -0,0 +1,47 @@ +use super::*; + +#[test] +fn test_block_doc_comment_1() { + let comment = "/**\n * Test \n ** Test\n * Test\n*/"; + let stripped = strip_doc_comment_decoration(comment); + assert_eq!(stripped, " Test \n* Test\n Test"); +} + +#[test] +fn test_block_doc_comment_2() { + let comment = "/**\n * Test\n * Test\n*/"; + let stripped = strip_doc_comment_decoration(comment); + assert_eq!(stripped, " Test\n Test"); +} + +#[test] +fn test_block_doc_comment_3() { + let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; + let stripped = strip_doc_comment_decoration(comment); + assert_eq!(stripped, " let a: *i32;\n *a = 5;"); +} + +#[test] +fn test_block_doc_comment_4() { + let comment = "/*******************\n test\n *********************/"; + let stripped = strip_doc_comment_decoration(comment); + assert_eq!(stripped, " test"); +} + +#[test] +fn test_line_doc_comment() { + let stripped = strip_doc_comment_decoration("/// test"); + assert_eq!(stripped, " test"); + let stripped = strip_doc_comment_decoration("///! test"); + assert_eq!(stripped, " test"); + let stripped = strip_doc_comment_decoration("// test"); + assert_eq!(stripped, " test"); + let stripped = strip_doc_comment_decoration("// test"); + assert_eq!(stripped, " test"); + let stripped = strip_doc_comment_decoration("///test"); + assert_eq!(stripped, "test"); + let stripped = strip_doc_comment_decoration("///!test"); + assert_eq!(stripped, "test"); + let stripped = strip_doc_comment_decoration("//test"); + assert_eq!(stripped, "test"); +} diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 01e3b292903..e86d4c7fde6 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1,39 +1,28 @@ -use crate::ast::{self, Ident}; -use crate::source_map::{SourceMap, FilePathMapping}; -use crate::parse::{token, ParseSess}; -use crate::symbol::{Symbol, keywords}; +use crate::parse::ParseSess; +use crate::parse::token::{self, Token, TokenKind}; +use crate::symbol::{sym, Symbol}; +use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; -use errors::{Applicability, FatalError, Diagnostic, DiagnosticBuilder}; -use syntax_pos::{BytePos, CharPos, Pos, Span, NO_EXPANSION}; -use core::unicode::property::Pattern_White_Space; +use errors::{FatalError, DiagnosticBuilder}; +use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; +use rustc_lexer::Base; +use rustc_lexer::unescape; use std::borrow::Cow; use std::char; use std::iter; -use std::mem::replace; +use std::convert::TryInto; use rustc_data_structures::sync::Lrc; use log::debug; +#[cfg(test)] +mod tests; + pub mod comments; mod tokentrees; mod unicode_chars; #[derive(Clone, Debug)] -pub struct TokenAndSpan { - pub tok: token::Token, - pub sp: Span, -} - -impl Default for TokenAndSpan { - fn default() -> Self { - TokenAndSpan { - tok: token::Whitespace, - sp: syntax_pos::DUMMY_SP, - } - } -} - -#[derive(Clone, Debug)] pub struct UnmatchedBrace { pub expected_delim: token::DelimToken, pub found_delim: token::DelimToken, @@ -43,193 +32,22 @@ pub struct UnmatchedBrace { } pub struct StringReader<'a> { - pub sess: &'a ParseSess, - /// The absolute offset within the source_map of the next character to read - pub next_pos: BytePos, - /// The absolute offset within the source_map of the current character - pub pos: BytePos, - /// The current character (which has been read from self.pos) - pub ch: Option<char>, - pub source_file: Lrc<syntax_pos::SourceFile>, + sess: &'a ParseSess, + /// Initial position, read-only. + start_pos: BytePos, + /// The absolute offset within the source_map of the current character. + pos: BytePos, /// Stop reading src at this index. - pub end_src_index: usize, - // cached: - peek_tok: token::Token, - peek_span: Span, - peek_span_src_raw: Span, - fatal_errs: Vec<DiagnosticBuilder<'a>>, - // cache a direct reference to the source text, so that we don't have to - // retrieve it via `self.source_file.src.as_ref().unwrap()` all the time. + end_src_index: usize, + /// Source text to tokenize. src: Lrc<String>, - token: token::Token, - span: Span, - /// The raw source span which *does not* take `override_span` into account - span_src_raw: Span, - /// Stack of open delimiters and their spans. Used for error message. - open_braces: Vec<(token::DelimToken, Span)>, - crate unmatched_braces: Vec<UnmatchedBrace>, - /// The type and spans for all braces - /// - /// Used only for error recovery when arriving to EOF with mismatched braces. - matching_delim_spans: Vec<(token::DelimToken, Span, Span)>, - crate override_span: Option<Span>, - last_unclosed_found_span: Option<Span>, + override_span: Option<Span>, } impl<'a> StringReader<'a> { - fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { - self.mk_sp_and_raw(lo, hi).0 - } - - fn mk_sp_and_raw(&self, lo: BytePos, hi: BytePos) -> (Span, Span) { - let raw = Span::new(lo, hi, NO_EXPANSION); - let real = self.override_span.unwrap_or(raw); - - (real, raw) - } - - fn mk_ident(&self, string: &str) -> Ident { - let mut ident = Ident::from_str(string); - if let Some(span) = self.override_span { - ident.span = span; - } - - ident - } - - fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan { - match res { - Ok(tok) => tok, - Err(_) => { - self.emit_fatal_errors(); - FatalError.raise(); - } - } - } - - fn next_token(&mut self) -> TokenAndSpan where Self: Sized { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - - /// Returns the next token. EFFECT: advances the string_reader. - pub fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { - assert!(self.fatal_errs.is_empty()); - let ret_val = TokenAndSpan { - tok: replace(&mut self.peek_tok, token::Whitespace), - sp: self.peek_span, - }; - self.advance_token()?; - self.span_src_raw = self.peek_span_src_raw; - - Ok(ret_val) - } - - /// Immutably extract string if found at current position with given delimiters - pub fn peek_delimited(&self, from_ch: char, to_ch: char) -> Option<String> { - let mut pos = self.pos; - let mut idx = self.src_index(pos); - let mut ch = char_at(&self.src, idx); - if ch != from_ch { - return None; - } - pos = pos + Pos::from_usize(ch.len_utf8()); - let start_pos = pos; - idx = self.src_index(pos); - while idx < self.end_src_index { - ch = char_at(&self.src, idx); - if ch == to_ch { - return Some(self.src[self.src_index(start_pos)..self.src_index(pos)].to_string()); - } - pos = pos + Pos::from_usize(ch.len_utf8()); - idx = self.src_index(pos); - } - return None; - } - - fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> { - let mut t = self.try_next_token()?; - loop { - match t.tok { - token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.try_next_token()?; - } - _ => break, - } - } - - self.token = t.tok.clone(); - self.span = t.sp; - - Ok(t) - } - - pub fn real_token(&mut self) -> TokenAndSpan { - let res = self.try_real_token(); - self.unwrap_or_abort(res) - } - - #[inline] - fn is_eof(&self) -> bool { - self.ch.is_none() - } - - fn fail_unterminated_raw_string(&self, pos: BytePos, hash_count: u16) { - let mut err = self.struct_span_fatal(pos, pos, "unterminated raw string"); - err.span_label(self.mk_sp(pos, pos), "unterminated raw string"); - - if hash_count > 0 { - err.note(&format!("this raw string should be terminated with `\"{}`", - "#".repeat(hash_count as usize))); - } - - err.emit(); - FatalError.raise(); - } - - fn fatal(&self, m: &str) -> FatalError { - self.fatal_span(self.peek_span, m) - } - - pub fn emit_fatal_errors(&mut self) { - for err in &mut self.fatal_errs { - err.emit(); - } - - self.fatal_errs.clear(); - } - - pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> { - let mut buffer = Vec::new(); - - for err in self.fatal_errs.drain(..) { - err.buffer(&mut buffer); - } - - buffer - } - - pub fn peek(&self) -> TokenAndSpan { - // FIXME(pcwalton): Bad copy! - TokenAndSpan { - tok: self.peek_tok.clone(), - sp: self.peek_span, - } - } - - /// For comments.rs, which hackily pokes into next_pos and ch - fn new_raw(sess: &'a ParseSess, + pub fn new(sess: &'a ParseSess, source_file: Lrc<syntax_pos::SourceFile>, override_span: Option<Span>) -> Self { - let mut sr = StringReader::new_raw_internal(sess, source_file, override_span); - sr.bump(); - - sr - } - - fn new_raw_internal(sess: &'a ParseSess, source_file: Lrc<syntax_pos::SourceFile>, - override_span: Option<Span>) -> Self - { if source_file.src.is_none() { sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", source_file.name)); @@ -239,48 +57,11 @@ impl<'a> StringReader<'a> { StringReader { sess, - next_pos: source_file.start_pos, + start_pos: source_file.start_pos, pos: source_file.start_pos, - ch: Some('\n'), - source_file, end_src_index: src.len(), - // dummy values; not read - peek_tok: token::Eof, - peek_span: syntax_pos::DUMMY_SP, - peek_span_src_raw: syntax_pos::DUMMY_SP, src, - fatal_errs: Vec::new(), - token: token::Eof, - span: syntax_pos::DUMMY_SP, - span_src_raw: syntax_pos::DUMMY_SP, - open_braces: Vec::new(), - unmatched_braces: Vec::new(), - matching_delim_spans: Vec::new(), override_span, - last_unclosed_found_span: None, - } - } - - pub fn new(sess: &'a ParseSess, - source_file: Lrc<syntax_pos::SourceFile>, - override_span: Option<Span>) -> Self { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - - sr - } - - pub fn new_or_buffered_errs(sess: &'a ParseSess, - source_file: Lrc<syntax_pos::SourceFile>, - override_span: Option<Span>) -> Result<Self, Vec<Diagnostic>> { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - Err(sr.buffer_fatal_errors()) - } else { - Ok(sr) } } @@ -293,25 +74,61 @@ impl<'a> StringReader<'a> { span = span.shrink_to_lo(); } - let mut sr = StringReader::new_raw_internal(sess, begin.sf, None); + let mut sr = StringReader::new(sess, begin.sf, None); // Seek the lexer to the right byte range. - sr.next_pos = span.lo(); sr.end_src_index = sr.src_index(span.hi()); - sr.bump(); + sr + } - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - sr + fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { + self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION)) } - #[inline] - fn ch_is(&self, c: char) -> bool { - self.ch == Some(c) + /// Returns the next token, including trivia like whitespace or comments. + /// + /// `Err(())` means that some errors were encountered, which can be + /// retrieved using `buffer_fatal_errors`. + pub fn next_token(&mut self) -> Token { + let start_src_index = self.src_index(self.pos); + let text: &str = &self.src[start_src_index..self.end_src_index]; + + if text.is_empty() { + let span = self.mk_sp(self.pos, self.pos); + return Token::new(token::Eof, span); + } + + { + let is_beginning_of_file = self.pos == self.start_pos; + if is_beginning_of_file { + if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { + let start = self.pos; + self.pos = self.pos + BytePos::from_usize(shebang_len); + + let sym = self.symbol_from(start + BytePos::from_usize("#!".len())); + let kind = token::Shebang(sym); + + let span = self.mk_sp(start, self.pos); + return Token::new(kind, span); + } + } + } + + let token = rustc_lexer::first_token(text); + + let start = self.pos; + self.pos = self.pos + BytePos::from_usize(token.len); + + debug!("try_next_token: {:?}({:?})", token.kind, self.str_from(start)); + + // This could use `?`, but that makes code significantly (10-20%) slower. + // https://github.com/rust-lang/rust/issues/37939 + let kind = self.cook_lexer_token(token.kind, start); + + let span = self.mk_sp(start, self.pos); + Token::new(kind, span) } /// Report a fatal lexical error with a given span. @@ -335,29 +152,6 @@ impl<'a> StringReader<'a> { self.err_span(self.mk_sp(from_pos, to_pos), m) } - /// Pushes a character to a message string for error reporting - fn push_escaped_char_for_msg(m: &mut String, c: char) { - match c { - '\u{20}'..='\u{7e}' => { - // Don't escape \, ' or " for user-facing messages - m.push(c); - } - _ => { - m.extend(c.escape_default()); - } - } - } - - /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an - /// escaped character to the error message - fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError { - let mut m = m.to_string(); - m.push_str(": "); - Self::push_escaped_char_for_msg(&mut m, c); - - self.fatal_span_(from_pos, to_pos, &m[..]) - } - fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> DiagnosticBuilder<'a> { @@ -369,104 +163,357 @@ impl<'a> StringReader<'a> { { let mut m = m.to_string(); m.push_str(": "); - Self::push_escaped_char_for_msg(&mut m, c); + push_escaped_char(&mut m, c); self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) } - /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an - /// escaped character to the error message - fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) { - let mut m = m.to_string(); - m.push_str(": "); - Self::push_escaped_char_for_msg(&mut m, c); - self.err_span_(from_pos, to_pos, &m[..]); - } - - fn struct_err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) - -> DiagnosticBuilder<'a> - { - let mut m = m.to_string(); - m.push_str(": "); - Self::push_escaped_char_for_msg(&mut m, c); + /// Turns simple `rustc_lexer::TokenKind` enum into a rich + /// `libsyntax::TokenKind`. This turns strings into interned + /// symbols and runs additional validation. + fn cook_lexer_token( + &self, + token: rustc_lexer::TokenKind, + start: BytePos, + ) -> TokenKind { + match token { + rustc_lexer::TokenKind::LineComment => { + let string = self.str_from(start); + // comments with only more "/"s are not doc comments + let tok = if is_doc_comment(string) { + let mut idx = 0; + loop { + idx = match string[idx..].find('\r') { + None => break, + Some(it) => idx + it + 1 + }; + if string[idx..].chars().next() != Some('\n') { + self.err_span_(start + BytePos(idx as u32 - 1), + start + BytePos(idx as u32), + "bare CR not allowed in doc-comment"); + } + } + token::DocComment(Symbol::intern(string)) + } else { + token::Comment + }; - self.sess.span_diagnostic.struct_span_err(self.mk_sp(from_pos, to_pos), &m[..]) - } + tok + } + rustc_lexer::TokenKind::BlockComment { terminated } => { + let string = self.str_from(start); + // block comments starting with "/**" or "/*!" are doc-comments + // but comments with only "*"s between two "/"s are not + let is_doc_comment = is_block_doc_comment(string); - /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the - /// offending string to the error message - fn fatal_span_verbose(&self, from_pos: BytePos, to_pos: BytePos, mut m: String) -> FatalError { - m.push_str(": "); - m.push_str(&self.src[self.src_index(from_pos)..self.src_index(to_pos)]); + if !terminated { + let msg = if is_doc_comment { + "unterminated block doc-comment" + } else { + "unterminated block comment" + }; + let last_bpos = self.pos; + self.fatal_span_(start, last_bpos, msg).raise(); + } - self.fatal_span_(from_pos, to_pos, &m[..]) - } + let tok = if is_doc_comment { + let has_cr = string.contains('\r'); + let string = if has_cr { + self.translate_crlf(start, + string, + "bare CR not allowed in block doc-comment") + } else { + string.into() + }; + token::DocComment(Symbol::intern(&string[..])) + } else { + token::Comment + }; - /// Advance peek_tok and peek_span to refer to the next token, and - /// possibly update the interner. - fn advance_token(&mut self) -> Result<(), ()> { - match self.scan_whitespace_or_comment() { - Some(comment) => { - self.peek_span_src_raw = comment.sp; - self.peek_span = comment.sp; - self.peek_tok = comment.tok; + tok } - None => { - if self.is_eof() { - self.peek_tok = token::Eof; - let (real, raw) = self.mk_sp_and_raw( - self.source_file.end_pos, - self.source_file.end_pos, - ); - self.peek_span = real; - self.peek_span_src_raw = raw; + rustc_lexer::TokenKind::Whitespace => token::Whitespace, + rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => { + let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent; + let mut ident_start = start; + if is_raw_ident { + ident_start = ident_start + BytePos(2); + } + // FIXME: perform NFKC normalization here. (Issue #2253) + let sym = self.symbol_from(ident_start); + if is_raw_ident { + let span = self.mk_sp(start, self.pos); + if !sym.can_be_raw() { + self.err_span(span, &format!("`{}` cannot be a raw identifier", sym)); + } + self.sess.raw_identifier_spans.borrow_mut().push(span); + } + token::Ident(sym, is_raw_ident) + } + rustc_lexer::TokenKind::Literal { kind, suffix_start } => { + let suffix_start = start + BytePos(suffix_start as u32); + let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind); + let suffix = if suffix_start < self.pos { + let string = self.str_from(suffix_start); + if string == "_" { + self.sess.span_diagnostic + .struct_span_warn(self.mk_sp(suffix_start, self.pos), + "underscore literal suffix is not allowed") + .warn("this was previously accepted by the compiler but is \ + being phased out; it will become a hard error in \ + a future release!") + .note("for more information, see issue #42326 \ + <https://github.com/rust-lang/rust/issues/42326>") + .emit(); + None + } else { + Some(Symbol::intern(string)) + } } else { - let start_bytepos = self.pos; - self.peek_tok = self.next_token_inner()?; - let (real, raw) = self.mk_sp_and_raw(start_bytepos, self.pos); - self.peek_span = real; - self.peek_span_src_raw = raw; + None }; + token::Literal(token::Lit { kind, symbol, suffix }) + } + rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + // Include the leading `'` in the real identifier, for macro + // expansion purposes. See #12512 for the gory details of why + // this is necessary. + let lifetime_name = self.str_from(start); + if starts_with_number { + self.err_span_( + start, + self.pos, + "lifetimes cannot start with a number", + ); + } + let ident = Symbol::intern(lifetime_name); + token::Lifetime(ident) + } + rustc_lexer::TokenKind::Semi => token::Semi, + rustc_lexer::TokenKind::Comma => token::Comma, + rustc_lexer::TokenKind::DotDotDot => token::DotDotDot, + rustc_lexer::TokenKind::DotDotEq => token::DotDotEq, + rustc_lexer::TokenKind::DotDot => token::DotDot, + rustc_lexer::TokenKind::Dot => token::Dot, + rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren), + rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren), + rustc_lexer::TokenKind::OpenBrace => token::OpenDelim(token::Brace), + rustc_lexer::TokenKind::CloseBrace => token::CloseDelim(token::Brace), + rustc_lexer::TokenKind::OpenBracket => token::OpenDelim(token::Bracket), + rustc_lexer::TokenKind::CloseBracket => token::CloseDelim(token::Bracket), + rustc_lexer::TokenKind::At => token::At, + rustc_lexer::TokenKind::Pound => token::Pound, + rustc_lexer::TokenKind::Tilde => token::Tilde, + rustc_lexer::TokenKind::Question => token::Question, + rustc_lexer::TokenKind::ColonColon => token::ModSep, + rustc_lexer::TokenKind::Colon => token::Colon, + rustc_lexer::TokenKind::Dollar => token::Dollar, + rustc_lexer::TokenKind::EqEq => token::EqEq, + rustc_lexer::TokenKind::Eq => token::Eq, + rustc_lexer::TokenKind::FatArrow => token::FatArrow, + rustc_lexer::TokenKind::Ne => token::Ne, + rustc_lexer::TokenKind::Not => token::Not, + rustc_lexer::TokenKind::Le => token::Le, + rustc_lexer::TokenKind::LArrow => token::LArrow, + rustc_lexer::TokenKind::Lt => token::Lt, + rustc_lexer::TokenKind::ShlEq => token::BinOpEq(token::Shl), + rustc_lexer::TokenKind::Shl => token::BinOp(token::Shl), + rustc_lexer::TokenKind::Ge => token::Ge, + rustc_lexer::TokenKind::Gt => token::Gt, + rustc_lexer::TokenKind::ShrEq => token::BinOpEq(token::Shr), + rustc_lexer::TokenKind::Shr => token::BinOp(token::Shr), + rustc_lexer::TokenKind::RArrow => token::RArrow, + rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), + rustc_lexer::TokenKind::MinusEq => token::BinOpEq(token::Minus), + rustc_lexer::TokenKind::And => token::BinOp(token::And), + rustc_lexer::TokenKind::AndEq => token::BinOpEq(token::And), + rustc_lexer::TokenKind::AndAnd => token::AndAnd, + rustc_lexer::TokenKind::Or => token::BinOp(token::Or), + rustc_lexer::TokenKind::OrEq => token::BinOpEq(token::Or), + rustc_lexer::TokenKind::OrOr => token::OrOr, + rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), + rustc_lexer::TokenKind::PlusEq => token::BinOpEq(token::Plus), + rustc_lexer::TokenKind::Star => token::BinOp(token::Star), + rustc_lexer::TokenKind::StarEq => token::BinOpEq(token::Star), + rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), + rustc_lexer::TokenKind::SlashEq => token::BinOpEq(token::Slash), + rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), + rustc_lexer::TokenKind::CaretEq => token::BinOpEq(token::Caret), + rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), + rustc_lexer::TokenKind::PercentEq => token::BinOpEq(token::Percent), + + rustc_lexer::TokenKind::Unknown => { + let c = self.str_from(start).chars().next().unwrap(); + let mut err = self.struct_fatal_span_char(start, + self.pos, + "unknown start of token", + c); + // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, + // instead of keeping a table in `check_for_substitution`into the token. Ideally, + // this should be inside `rustc_lexer`. However, we should first remove compound + // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, + // as there will be less overall work to do this way. + let token = unicode_chars::check_for_substitution(self, start, c, &mut err) + .unwrap_or_else(|| token::Unknown(self.symbol_from(start))); + err.emit(); + token } } + } - Ok(()) + fn cook_lexer_literal( + &self, + start: BytePos, + suffix_start: BytePos, + kind: rustc_lexer::LiteralKind + ) -> (token::LitKind, Symbol) { + match kind { + rustc_lexer::LiteralKind::Char { terminated } => { + if !terminated { + self.fatal_span_(start, suffix_start, + "unterminated character literal".into()) + .raise() + } + let content_start = start + BytePos(1); + let content_end = suffix_start - BytePos(1); + self.validate_char_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::Char, id) + }, + rustc_lexer::LiteralKind::Byte { terminated } => { + if !terminated { + self.fatal_span_(start + BytePos(1), suffix_start, + "unterminated byte constant".into()) + .raise() + } + let content_start = start + BytePos(2); + let content_end = suffix_start - BytePos(1); + self.validate_byte_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::Byte, id) + }, + rustc_lexer::LiteralKind::Str { terminated } => { + if !terminated { + self.fatal_span_(start, suffix_start, + "unterminated double quote string".into()) + .raise() + } + let content_start = start + BytePos(1); + let content_end = suffix_start - BytePos(1); + self.validate_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::Str, id) + } + rustc_lexer::LiteralKind::ByteStr { terminated } => { + if !terminated { + self.fatal_span_(start + BytePos(1), suffix_start, + "unterminated double quote byte string".into()) + .raise() + } + let content_start = start + BytePos(2); + let content_end = suffix_start - BytePos(1); + self.validate_byte_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::ByteStr, id) + } + rustc_lexer::LiteralKind::RawStr { n_hashes, started, terminated } => { + if !started { + self.report_non_started_raw_string(start); + } + if !terminated { + self.report_unterminated_raw_string(start, n_hashes) + } + let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); + let n = u32::from(n_hashes); + let content_start = start + BytePos(2 + n); + let content_end = suffix_start - BytePos(1 + n); + self.validate_raw_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::StrRaw(n_hashes), id) + } + rustc_lexer::LiteralKind::RawByteStr { n_hashes, started, terminated } => { + if !started { + self.report_non_started_raw_string(start); + } + if !terminated { + self.report_unterminated_raw_string(start, n_hashes) + } + let n_hashes: u16 = self.restrict_n_hashes(start, n_hashes); + let n = u32::from(n_hashes); + let content_start = start + BytePos(3 + n); + let content_end = suffix_start - BytePos(1 + n); + self.validate_raw_byte_str_escape(content_start, content_end); + let id = self.symbol_from_to(content_start, content_end); + (token::ByteStrRaw(n_hashes), id) + } + rustc_lexer::LiteralKind::Int { base, empty_int } => { + if empty_int { + self.err_span_(start, suffix_start, "no valid digits found for number"); + (token::Integer, sym::integer(0)) + } else { + self.validate_int_literal(base, start, suffix_start); + (token::Integer, self.symbol_from_to(start, suffix_start)) + } + }, + rustc_lexer::LiteralKind::Float { base, empty_exponent } => { + if empty_exponent { + let mut err = self.struct_span_fatal( + start, self.pos, + "expected at least one digit in exponent" + ); + err.emit(); + } + + match base { + Base::Hexadecimal => { + self.err_span_(start, suffix_start, + "hexadecimal float literal is not supported") + } + Base::Octal => { + self.err_span_(start, suffix_start, + "octal float literal is not supported") + } + Base::Binary => { + self.err_span_(start, suffix_start, + "binary float literal is not supported") + } + _ => () + } + + let id = self.symbol_from_to(start, suffix_start); + (token::Float, id) + }, + } } #[inline] fn src_index(&self, pos: BytePos) -> usize { - (pos - self.source_file.start_pos).to_usize() + (pos - self.start_pos).to_usize() } - /// Calls `f` with a string slice of the source text spanning from `start` - /// up to but excluding `self.pos`, meaning the slice does not include - /// the character `self.ch`. - fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T - where F: FnOnce(&str) -> T + /// Slice of the source text from `start` up to but excluding `self.pos`, + /// meaning the slice does not include the character `self.ch`. + fn str_from(&self, start: BytePos) -> &str { - self.with_str_from_to(start, self.pos, f) + self.str_from_to(start, self.pos) } - /// Creates a Name from a given offset to the current offset, each - /// adjusted 1 towards each other (assumes that on either side there is a - /// single-byte delimiter). - fn name_from(&self, start: BytePos) -> ast::Name { + /// Creates a Symbol from a given offset to the current offset. + fn symbol_from(&self, start: BytePos) -> Symbol { debug!("taking an ident from {:?} to {:?}", start, self.pos); - self.with_str_from(start, Symbol::intern) + Symbol::intern(self.str_from(start)) } - /// As name_from, with an explicit endpoint. - fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name { + /// As symbol_from, with an explicit endpoint. + fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol { debug!("taking an ident from {:?} to {:?}", start, end); - self.with_str_from_to(start, end, Symbol::intern) + Symbol::intern(self.str_from_to(start, end)) } - /// Calls `f` with a string slice of the source text spanning from `start` - /// up to but excluding `end`. - fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T - where F: FnOnce(&str) -> T + /// Slice of the source text spanning from `start` up to but excluding `end`. + fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { - f(&self.src[self.src_index(start)..self.src_index(end)]) + &self.src[self.src_index(start)..self.src_index(end)] } /// Converts CRLF to LF in the given string, raising an error on bare CR. @@ -515,1360 +562,163 @@ impl<'a> StringReader<'a> { } } - /// Advance the StringReader by one character. - crate fn bump(&mut self) { - let next_src_index = self.src_index(self.next_pos); - if next_src_index < self.end_src_index { - let next_ch = char_at(&self.src, next_src_index); - let next_ch_len = next_ch.len_utf8(); - - self.ch = Some(next_ch); - self.pos = self.next_pos; - self.next_pos = self.next_pos + Pos::from_usize(next_ch_len); - } else { - self.ch = None; - self.pos = self.next_pos; - } - } - - fn nextch(&self) -> Option<char> { - let next_src_index = self.src_index(self.next_pos); - if next_src_index < self.end_src_index { - Some(char_at(&self.src, next_src_index)) - } else { - None - } - } - - #[inline] - fn nextch_is(&self, c: char) -> bool { - self.nextch() == Some(c) - } - - fn nextnextch(&self) -> Option<char> { - let next_src_index = self.src_index(self.next_pos); - if next_src_index < self.end_src_index { - let next_next_src_index = - next_src_index + char_at(&self.src, next_src_index).len_utf8(); - if next_next_src_index < self.end_src_index { - return Some(char_at(&self.src, next_next_src_index)); - } + fn report_non_started_raw_string(&self, start: BytePos) -> ! { + let bad_char = self.str_from(start).chars().last().unwrap(); + self + .struct_fatal_span_char( + start, + self.pos, + "found invalid character; only `#` is allowed \ + in raw string delimitation", + bad_char, + ) + .emit(); + FatalError.raise() + } + + fn report_unterminated_raw_string(&self, start: BytePos, n_hashes: usize) -> ! { + let mut err = self.struct_span_fatal( + start, start, + "unterminated raw string", + ); + err.span_label( + self.mk_sp(start, start), + "unterminated raw string", + ); + + if n_hashes > 0 { + err.note(&format!("this raw string should be terminated with `\"{}`", + "#".repeat(n_hashes as usize))); } - None - } - #[inline] - fn nextnextch_is(&self, c: char) -> bool { - self.nextnextch() == Some(c) + err.emit(); + FatalError.raise() } - /// Eats <XID_start><XID_continue>*, if possible. - fn scan_optional_raw_name(&mut self) -> Option<ast::Name> { - if !ident_start(self.ch) { - return None; - } - - let start = self.pos; - self.bump(); - - while ident_continue(self.ch) { - self.bump(); - } - - self.with_str_from(start, |string| { - if string == "_" { - self.sess.span_diagnostic - .struct_span_warn(self.mk_sp(start, self.pos), - "underscore literal suffix is not allowed") - .warn("this was previously accepted by the compiler but is \ - being phased out; it will become a hard error in \ - a future release!") - .note("for more information, see issue #42326 \ - <https://github.com/rust-lang/rust/issues/42326>") - .emit(); - None - } else { - Some(Symbol::intern(string)) + fn restrict_n_hashes(&self, start: BytePos, n_hashes: usize) -> u16 { + match n_hashes.try_into() { + Ok(n_hashes) => n_hashes, + Err(_) => { + self.fatal_span_(start, + self.pos, + "too many `#` symbols: raw strings may be \ + delimited by up to 65535 `#` symbols").raise(); + } + } + } + + fn validate_char_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + if let Err((off, err)) = unescape::unescape_char(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Char, + 0..off, + err, + ) + } + } + + fn validate_byte_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + if let Err((off, err)) = unescape::unescape_byte(lit) { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Byte, + 0..off, + err, + ) + } + } + + fn validate_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, + err, + ) } }) } - /// PRECONDITION: self.ch is not whitespace - /// Eats any kind of comment. - fn scan_comment(&mut self) -> Option<TokenAndSpan> { - if let Some(c) = self.ch { - if c.is_whitespace() { - let msg = "called consume_any_line_comment, but there was whitespace"; - self.sess.span_diagnostic.span_err(self.mk_sp(self.pos, self.pos), msg); - } - } - - if self.ch_is('/') { - match self.nextch() { - Some('/') => { - self.bump(); - self.bump(); - - // line comments starting with "///" or "//!" are doc-comments - let doc_comment = (self.ch_is('/') && !self.nextch_is('/')) || self.ch_is('!'); - let start_bpos = self.pos - BytePos(2); - - while !self.is_eof() { - match self.ch.unwrap() { - '\n' => break, - '\r' => { - if self.nextch_is('\n') { - // CRLF - break; - } else if doc_comment { - self.err_span_(self.pos, - self.next_pos, - "bare CR not allowed in doc-comment"); - } - } - _ => (), - } - self.bump(); - } - - if doc_comment { - self.with_str_from(start_bpos, |string| { - // comments with only more "/"s are not doc comments - let tok = if is_doc_comment(string) { - token::DocComment(Symbol::intern(string)) - } else { - token::Comment - }; - - Some(TokenAndSpan { - tok, - sp: self.mk_sp(start_bpos, self.pos), - }) - }) - } else { - Some(TokenAndSpan { - tok: token::Comment, - sp: self.mk_sp(start_bpos, self.pos), - }) - } - } - Some('*') => { - self.bump(); - self.bump(); - self.scan_block_comment() - } - _ => None, - } - } else if self.ch_is('#') { - if self.nextch_is('!') { - - // Parse an inner attribute. - if self.nextnextch_is('[') { - return None; - } - - // I guess this is the only way to figure out if - // we're at the beginning of the file... - let smap = SourceMap::new(FilePathMapping::empty()); - smap.files.borrow_mut().source_files.push(self.source_file.clone()); - let loc = smap.lookup_char_pos_adj(self.pos); - debug!("Skipping a shebang"); - if loc.line == 1 && loc.col == CharPos(0) { - // FIXME: Add shebang "token", return it - let start = self.pos; - while !self.ch_is('\n') && !self.is_eof() { - self.bump(); - } - return Some(TokenAndSpan { - tok: token::Shebang(self.name_from(start)), - sp: self.mk_sp(start, self.pos), - }); - } + fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::Str, + range, + err, + ) } - None - } else { - None - } - } - - /// If there is whitespace, shebang, or a comment, scan it. Otherwise, - /// return `None`. - fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> { - match self.ch.unwrap_or('\0') { - // # to handle shebang at start of file -- this is the entry point - // for skipping over all "junk" - '/' | '#' => { - let c = self.scan_comment(); - debug!("scanning a comment {:?}", c); - c - }, - c if is_pattern_whitespace(Some(c)) => { - let start_bpos = self.pos; - while is_pattern_whitespace(self.ch) { - self.bump(); - } - let c = Some(TokenAndSpan { - tok: token::Whitespace, - sp: self.mk_sp(start_bpos, self.pos), - }); - debug!("scanning whitespace: {:?}", c); - c - } - _ => None, - } - } - - /// Might return a sugared-doc-attr - fn scan_block_comment(&mut self) -> Option<TokenAndSpan> { - // block comments starting with "/**" or "/*!" are doc-comments - let is_doc_comment = self.ch_is('*') || self.ch_is('!'); - let start_bpos = self.pos - BytePos(2); - - let mut level: isize = 1; - let mut has_cr = false; - while level > 0 { - if self.is_eof() { - let msg = if is_doc_comment { - "unterminated block doc-comment" - } else { - "unterminated block comment" - }; - let last_bpos = self.pos; - self.fatal_span_(start_bpos, last_bpos, msg).raise(); - } - let n = self.ch.unwrap(); - match n { - '/' if self.nextch_is('*') => { - level += 1; - self.bump(); - } - '*' if self.nextch_is('/') => { - level -= 1; - self.bump(); - } - '\r' => { - has_cr = true; - } - _ => (), - } - self.bump(); - } - - self.with_str_from(start_bpos, |string| { - // but comments with only "*"s between two "/"s are not - let tok = if is_block_doc_comment(string) { - let string = if has_cr { - self.translate_crlf(start_bpos, - string, - "bare CR not allowed in block doc-comment") - } else { - string.into() - }; - token::DocComment(Symbol::intern(&string[..])) - } else { - token::Comment - }; - - Some(TokenAndSpan { - tok, - sp: self.mk_sp(start_bpos, self.pos), - }) }) } - /// Scan through any digits (base `scan_radix`) or underscores, - /// and return how many digits there were. - /// - /// `real_radix` represents the true radix of the number we're - /// interested in, and errors will be emitted for any digits - /// between `real_radix` and `scan_radix`. - fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize { - assert!(real_radix <= scan_radix); - let mut len = 0; - - loop { - let c = self.ch; - if c == Some('_') { - debug!("skipping a _"); - self.bump(); - continue; - } - match c.and_then(|cc| cc.to_digit(scan_radix)) { - Some(_) => { - debug!("{:?} in scan_digits", c); - // check that the hypothetical digit is actually - // in range for the true radix - if c.unwrap().to_digit(real_radix).is_none() { - self.err_span_(self.pos, - self.next_pos, - &format!("invalid digit for a base {} literal", real_radix)); - } - len += 1; - self.bump(); - } - _ => return len, - } - } - } - - /// Lex a LIT_INTEGER or a LIT_FLOAT - fn scan_number(&mut self, c: char) -> token::Lit { - let mut base = 10; - let start_bpos = self.pos; - self.bump(); - - let num_digits = if c == '0' { - match self.ch.unwrap_or('\0') { - 'b' => { - self.bump(); - base = 2; - self.scan_digits(2, 10) - } - 'o' => { - self.bump(); - base = 8; - self.scan_digits(8, 10) - } - 'x' => { - self.bump(); - base = 16; - self.scan_digits(16, 16) - } - '0'..='9' | '_' | '.' | 'e' | 'E' => { - self.scan_digits(10, 10) + 1 - } - _ => { - // just a 0 - return token::Integer(self.name_from(start_bpos)); - } - } - } else if c.is_digit(10) { - self.scan_digits(10, 10) + 1 - } else { - 0 - }; - - if num_digits == 0 { - self.err_span_(start_bpos, self.pos, "no valid digits found for number"); - - return token::Integer(Symbol::intern("0")); - } - - // might be a float, but don't be greedy if this is actually an - // integer literal followed by field/method access or a range pattern - // (`0..2` and `12.foo()`) - if self.ch_is('.') && !self.nextch_is('.') && - !ident_start(self.nextch()) { - // might have stuff after the ., and if it does, it needs to start - // with a number - self.bump(); - if self.ch.unwrap_or('\0').is_digit(10) { - self.scan_digits(10, 10); - self.scan_float_exponent(); - } - let pos = self.pos; - self.check_float_base(start_bpos, pos, base); - - token::Float(self.name_from(start_bpos)) - } else { - // it might be a float if it has an exponent - if self.ch_is('e') || self.ch_is('E') { - self.scan_float_exponent(); - let pos = self.pos; - self.check_float_base(start_bpos, pos, base); - return token::Float(self.name_from(start_bpos)); - } - // but we certainly have an integer! - token::Integer(self.name_from(start_bpos)) - } - } - - /// Scan over `n_digits` hex digits, stopping at `delim`, reporting an - /// error if too many or too few digits are encountered. - fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool { - debug!("scanning {} digits until {:?}", n_digits, delim); - let start_bpos = self.pos; - let mut accum_int = 0; - - let mut valid = true; - for _ in 0..n_digits { - if self.is_eof() { - let last_bpos = self.pos; - self.fatal_span_(start_bpos, - last_bpos, - "unterminated numeric character escape").raise(); - } - if self.ch_is(delim) { - let last_bpos = self.pos; - self.err_span_(start_bpos, - last_bpos, - "numeric character escape is too short"); - valid = false; - break; - } - let c = self.ch.unwrap_or('\x00'); - accum_int *= 16; - accum_int += c.to_digit(16).unwrap_or_else(|| { - self.err_span_char(self.pos, - self.next_pos, - "invalid character in numeric character escape", - c); - - valid = false; - 0 - }); - self.bump(); - } - - if below_0x7f_only && accum_int >= 0x80 { - self.err_span_(start_bpos, - self.pos, - "this form of character escape may only be used with characters in \ - the range [\\x00-\\x7f]"); - valid = false; - } - - match char::from_u32(accum_int) { - Some(_) => valid, - None => { - let last_bpos = self.pos; - self.err_span_(start_bpos, last_bpos, "invalid numeric character escape"); - false - } - } - } - - /// Scan for a single (possibly escaped) byte or char - /// in a byte, (non-raw) byte string, char, or (non-raw) string literal. - /// `start` is the position of `first_source_char`, which is already consumed. - /// - /// Returns `true` if there was a valid char/byte. - fn scan_char_or_byte(&mut self, - start: BytePos, - first_source_char: char, - ascii_only: bool, - delim: char) - -> bool - { - match first_source_char { - '\\' => { - // '\X' for some X must be a character constant: - let escaped = self.ch; - let escaped_pos = self.pos; - self.bump(); - match escaped { - None => {} // EOF here is an error that will be checked later. - Some(e) => { - return match e { - 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true, - 'x' => self.scan_byte_escape(delim, !ascii_only), - 'u' => { - let valid = if self.ch_is('{') { - self.scan_unicode_escape(delim) && !ascii_only - } else { - let span = self.mk_sp(start, self.pos); - let mut suggestion = "\\u{".to_owned(); - let mut err = self.sess.span_diagnostic.struct_span_err( - span, - "incorrect unicode escape sequence", - ); - let mut i = 0; - while let (Some(ch), true) = (self.ch, i < 6) { - if ch.is_digit(16) { - suggestion.push(ch); - self.bump(); - i += 1; - } else { - break; - } - } - if i != 0 { - suggestion.push('}'); - err.span_suggestion( - self.mk_sp(start, self.pos), - "format of unicode escape sequences uses braces", - suggestion, - Applicability::MaybeIncorrect, - ); - } else { - err.span_help( - span, - "format of unicode escape sequences is `\\u{...}`", - ); - } - err.emit(); - false - }; - if ascii_only { - self.err_span_(start, - self.pos, - "unicode escape sequences cannot be used as a \ - byte or in a byte string"); - } - valid - - } - '\n' if delim == '"' => { - self.consume_whitespace(); - true - } - '\r' if delim == '"' && self.ch_is('\n') => { - self.consume_whitespace(); - true - } - c => { - let pos = self.pos; - let mut err = self.struct_err_span_char(escaped_pos, - pos, - if ascii_only { - "unknown byte escape" - } else { - "unknown character \ - escape" - }, - c); - if e == '\r' { - err.span_help(self.mk_sp(escaped_pos, pos), - "this is an isolated carriage return; consider \ - checking your editor and version control \ - settings"); - } - if (e == '{' || e == '}') && !ascii_only { - err.span_help(self.mk_sp(escaped_pos, pos), - "if used in a formatting string, curly braces \ - are escaped with `{{` and `}}`"); - } - err.emit(); - false - } - } - } - } - } - '\t' | '\n' | '\r' | '\'' if delim == '\'' => { - let pos = self.pos; - self.err_span_char(start, - pos, - if ascii_only { - "byte constant must be escaped" - } else { - "character constant must be escaped" - }, - first_source_char); - return false; + fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_raw_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) } - '\r' => { - if self.ch_is('\n') { - self.bump(); - return true; - } else { - self.err_span_(start, - self.pos, - "bare CR not allowed in string, use \\r instead"); - return false; - } - } - _ => { - if ascii_only && first_source_char > '\x7F' { - let pos = self.pos; - self.err_span_(start, - pos, - "byte constant must be ASCII. Use a \\xHH escape for a \ - non-ASCII byte"); - return false; - } - } - } - true - } - - /// Scan over a `\u{...}` escape - /// - /// At this point, we have already seen the `\` and the `u`, the `{` is the current character. - /// We will read a hex number (with `_` separators), with 1 to 6 actual digits, - /// and pass over the `}`. - fn scan_unicode_escape(&mut self, delim: char) -> bool { - self.bump(); // past the { - let start_bpos = self.pos; - let mut valid = true; - - if let Some('_') = self.ch { - // disallow leading `_` - self.err_span_(self.pos, - self.next_pos, - "invalid start of unicode escape"); - valid = false; - } - - let count = self.scan_digits(16, 16); - - if count > 6 { - self.err_span_(start_bpos, - self.pos, - "overlong unicode escape (must have at most 6 hex digits)"); - valid = false; - } - - loop { - match self.ch { - Some('}') => { - if valid && count == 0 { - self.err_span_(start_bpos, - self.pos, - "empty unicode escape (must have at least 1 hex digit)"); - valid = false; - } - self.bump(); // past the ending `}` - break; - }, - Some(c) => { - if c == delim { - self.err_span_(self.pos, - self.pos, - "unterminated unicode escape (needed a `}`)"); - valid = false; - break; - } else if valid { - self.err_span_char(start_bpos, - self.pos, - "invalid character in unicode escape", - c); - valid = false; - } - }, - None => { - self.fatal_span_(start_bpos, - self.pos, - "unterminated unicode escape (found EOF)").raise(); - } - } - self.bump(); - } - - valid - } - - /// Scan over a float exponent. - fn scan_float_exponent(&mut self) { - if self.ch_is('e') || self.ch_is('E') { - self.bump(); - - if self.ch_is('-') || self.ch_is('+') { - self.bump(); - } - - if self.scan_digits(10, 10) == 0 { - let mut err = self.struct_span_fatal( - self.pos, self.next_pos, - "expected at least one digit in exponent" - ); - if let Some(ch) = self.ch { - // check for e.g., Unicode minus '−' (Issue #49746) - if unicode_chars::check_for_substitution(self, ch, &mut err) { - self.bump(); - self.scan_digits(10, 10); - } - } - err.emit(); - } - } - } - - /// Checks that a base is valid for a floating literal, emitting a nice - /// error if it isn't. - fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) { - match base { - 16 => { - self.err_span_(start_bpos, - last_bpos, - "hexadecimal float literal is not supported") - } - 8 => { - self.err_span_(start_bpos, - last_bpos, - "octal float literal is not supported") - } - 2 => { - self.err_span_(start_bpos, - last_bpos, - "binary float literal is not supported") - } - _ => (), - } - } - - fn binop(&mut self, op: token::BinOpToken) -> token::Token { - self.bump(); - if self.ch_is('=') { - self.bump(); - token::BinOpEq(op) - } else { - token::BinOp(op) - } + }) } - /// Returns the next token from the string, advances the input past that - /// token, and updates the interner - fn next_token_inner(&mut self) -> Result<token::Token, ()> { - let c = self.ch; - - if ident_start(c) { - let (is_ident_start, is_raw_ident) = - match (c.unwrap(), self.nextch(), self.nextnextch()) { - // r# followed by an identifier starter is a raw identifier. - // This is an exception to the r# case below. - ('r', Some('#'), x) if ident_start(x) => (true, true), - // r as in r" or r#" is part of a raw string literal. - // b as in b' is part of a byte literal. - // They are not identifiers, and are handled further down. - ('r', Some('"'), _) | - ('r', Some('#'), _) | - ('b', Some('"'), _) | - ('b', Some('\''), _) | - ('b', Some('r'), Some('"')) | - ('b', Some('r'), Some('#')) => (false, false), - _ => (true, false), - }; - - if is_ident_start { - let raw_start = self.pos; - if is_raw_ident { - // Consume the 'r#' characters. - self.bump(); - self.bump(); - } - - let start = self.pos; - self.bump(); - - while ident_continue(self.ch) { - self.bump(); - } - - return Ok(self.with_str_from(start, |string| { - // FIXME: perform NFKC normalization here. (Issue #2253) - let ident = self.mk_ident(string); - - if is_raw_ident && (ident.is_path_segment_keyword() || - ident.name == keywords::Underscore.name()) { - self.fatal_span_(raw_start, self.pos, - &format!("`r#{}` is not currently supported.", ident.name) - ).raise(); - } - - if is_raw_ident { - let span = self.mk_sp(raw_start, self.pos); - self.sess.raw_identifier_spans.borrow_mut().push(span); - } - - token::Ident(ident, is_raw_ident) - })); - } - } - - if is_dec_digit(c) { - let num = self.scan_number(c.unwrap()); - let suffix = self.scan_optional_raw_name(); - debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); - return Ok(token::Literal(num, suffix)); - } - - match c.expect("next_token_inner called at EOF") { - // One-byte tokens. - ';' => { - self.bump(); - Ok(token::Semi) - } - ',' => { - self.bump(); - Ok(token::Comma) - } - '.' => { - self.bump(); - if self.ch_is('.') { - self.bump(); - if self.ch_is('.') { - self.bump(); - Ok(token::DotDotDot) - } else if self.ch_is('=') { - self.bump(); - Ok(token::DotDotEq) - } else { - Ok(token::DotDot) - } - } else { - Ok(token::Dot) - } - } - '(' => { - self.bump(); - Ok(token::OpenDelim(token::Paren)) - } - ')' => { - self.bump(); - Ok(token::CloseDelim(token::Paren)) - } - '{' => { - self.bump(); - Ok(token::OpenDelim(token::Brace)) - } - '}' => { - self.bump(); - Ok(token::CloseDelim(token::Brace)) - } - '[' => { - self.bump(); - Ok(token::OpenDelim(token::Bracket)) - } - ']' => { - self.bump(); - Ok(token::CloseDelim(token::Bracket)) - } - '@' => { - self.bump(); - Ok(token::At) - } - '#' => { - self.bump(); - Ok(token::Pound) - } - '~' => { - self.bump(); - Ok(token::Tilde) - } - '?' => { - self.bump(); - Ok(token::Question) - } - ':' => { - self.bump(); - if self.ch_is(':') { - self.bump(); - Ok(token::ModSep) - } else { - Ok(token::Colon) - } - } - - '$' => { - self.bump(); - Ok(token::Dollar) - } - - // Multi-byte tokens. - '=' => { - self.bump(); - if self.ch_is('=') { - self.bump(); - Ok(token::EqEq) - } else if self.ch_is('>') { - self.bump(); - Ok(token::FatArrow) - } else { - Ok(token::Eq) - } - } - '!' => { - self.bump(); - if self.ch_is('=') { - self.bump(); - Ok(token::Ne) - } else { - Ok(token::Not) - } - } - '<' => { - self.bump(); - match self.ch.unwrap_or('\x00') { - '=' => { - self.bump(); - Ok(token::Le) - } - '<' => { - Ok(self.binop(token::Shl)) - } - '-' => { - self.bump(); - Ok(token::LArrow) - } - _ => { - Ok(token::Lt) - } - } - } - '>' => { - self.bump(); - match self.ch.unwrap_or('\x00') { - '=' => { - self.bump(); - Ok(token::Ge) - } - '>' => { - Ok(self.binop(token::Shr)) - } - _ => { - Ok(token::Gt) - } - } - } - '\'' => { - // Either a character constant 'a' OR a lifetime name 'abc - let start_with_quote = self.pos; - self.bump(); - let start = self.pos; - - // the eof will be picked up by the final `'` check below - let c2 = self.ch.unwrap_or('\x00'); - self.bump(); - - // If the character is an ident start not followed by another single - // quote, then this is a lifetime name: - if (ident_start(Some(c2)) || c2.is_numeric()) && !self.ch_is('\'') { - while ident_continue(self.ch) { - self.bump(); - } - // lifetimes shouldn't end with a single quote - // if we find one, then this is an invalid character literal - if self.ch_is('\'') { - self.err_span_( - start_with_quote, - self.next_pos, - "character literal may only contain one codepoint"); - self.bump(); - return Ok(token::Literal(token::Err(Symbol::intern("??")), None)) - - } - - // Include the leading `'` in the real identifier, for macro - // expansion purposes. See #12512 for the gory details of why - // this is necessary. - let ident = self.with_str_from(start, |lifetime_name| { - self.mk_ident(&format!("'{}", lifetime_name)) - }); - - if c2.is_numeric() { - // this is a recovered lifetime written `'1`, error but accept it - self.err_span_( - start_with_quote, - self.pos, - "lifetimes cannot start with a number", - ); - } - - return Ok(token::Lifetime(ident)); - } - - let valid = self.scan_char_or_byte(start, c2, /* ascii_only */ false, '\''); - - if !self.ch_is('\'') { - let pos = self.pos; - - loop { - self.bump(); - if self.ch_is('\'') { - let start = self.src_index(start); - let end = self.src_index(self.pos); - self.bump(); - let span = self.mk_sp(start_with_quote, self.pos); - self.sess.span_diagnostic - .struct_span_err(span, - "character literal may only contain one codepoint") - .span_suggestion( - span, - "if you meant to write a `str` literal, use double quotes", - format!("\"{}\"", &self.src[start..end]), - Applicability::MachineApplicable - ).emit(); - return Ok(token::Literal(token::Err(Symbol::intern("??")), None)) - } - if self.ch_is('\n') || self.is_eof() || self.ch_is('/') { - // Only attempt to infer single line string literals. If we encounter - // a slash, bail out in order to avoid nonsensical suggestion when - // involving comments. - break; - } - } - - self.fatal_span_verbose(start_with_quote, pos, - String::from("character literal may only contain one codepoint")).raise(); - } - - let id = if valid { - self.name_from(start) - } else { - Symbol::intern("0") - }; - - self.bump(); // advance ch past token - let suffix = self.scan_optional_raw_name(); - - Ok(token::Literal(token::Char(id), suffix)) - } - 'b' => { - self.bump(); - let lit = match self.ch { - Some('\'') => self.scan_byte(), - Some('"') => self.scan_byte_string(), - Some('r') => self.scan_raw_byte_string(), - _ => unreachable!(), // Should have been a token::Ident above. - }; - let suffix = self.scan_optional_raw_name(); - - Ok(token::Literal(lit, suffix)) - } - '"' => { - let start_bpos = self.pos; - let mut valid = true; - self.bump(); - - while !self.ch_is('"') { - if self.is_eof() { - let last_bpos = self.pos; - self.fatal_span_(start_bpos, - last_bpos, - "unterminated double quote string").raise(); - } - - let ch_start = self.pos; - let ch = self.ch.unwrap(); - self.bump(); - valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only */ false, '"'); - } - // adjust for the ASCII " at the start of the literal - let id = if valid { - self.name_from(start_bpos + BytePos(1)) - } else { - Symbol::intern("??") - }; - self.bump(); - let suffix = self.scan_optional_raw_name(); - - Ok(token::Literal(token::Str_(id), suffix)) - } - 'r' => { - let start_bpos = self.pos; - self.bump(); - let mut hash_count: u16 = 0; - while self.ch_is('#') { - if hash_count == 65535 { - let bpos = self.next_pos; - self.fatal_span_(start_bpos, - bpos, - "too many `#` symbols: raw strings may be \ - delimited by up to 65535 `#` symbols").raise(); - } - self.bump(); - hash_count += 1; - } - - if self.is_eof() { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } else if !self.ch_is('"') { - let last_bpos = self.pos; - let curr_char = self.ch.unwrap(); - self.fatal_span_char(start_bpos, - last_bpos, - "found invalid character; only `#` is allowed \ - in raw string delimitation", - curr_char).raise(); - } - self.bump(); - let content_start_bpos = self.pos; - let mut content_end_bpos; - let mut valid = true; - 'outer: loop { - if self.is_eof() { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } - // if self.ch_is('"') { - // content_end_bpos = self.pos; - // for _ in 0..hash_count { - // self.bump(); - // if !self.ch_is('#') { - // continue 'outer; - let c = self.ch.unwrap(); - match c { - '"' => { - content_end_bpos = self.pos; - for _ in 0..hash_count { - self.bump(); - if !self.ch_is('#') { - continue 'outer; - } - } - break; - } - '\r' => { - if !self.nextch_is('\n') { - let last_bpos = self.pos; - self.err_span_(start_bpos, - last_bpos, - "bare CR not allowed in raw string, use \\r \ - instead"); - valid = false; - } - } - _ => (), - } - self.bump(); - } - - self.bump(); - let id = if valid { - self.name_from_to(content_start_bpos, content_end_bpos) - } else { - Symbol::intern("??") - }; - let suffix = self.scan_optional_raw_name(); - - Ok(token::Literal(token::StrRaw(id, hash_count), suffix)) - } - '-' => { - if self.nextch_is('>') { - self.bump(); - self.bump(); - Ok(token::RArrow) - } else { - Ok(self.binop(token::Minus)) - } - } - '&' => { - if self.nextch_is('&') { - self.bump(); - self.bump(); - Ok(token::AndAnd) - } else { - Ok(self.binop(token::And)) - } - } - '|' => { - match self.nextch() { - Some('|') => { - self.bump(); - self.bump(); - Ok(token::OrOr) - } - _ => { - Ok(self.binop(token::Or)) - } - } - } - '+' => { - Ok(self.binop(token::Plus)) - } - '*' => { - Ok(self.binop(token::Star)) - } - '/' => { - Ok(self.binop(token::Slash)) - } - '^' => { - Ok(self.binop(token::Caret)) - } - '%' => { - Ok(self.binop(token::Percent)) - } - c => { - let last_bpos = self.pos; - let bpos = self.next_pos; - let mut err = self.struct_fatal_span_char(last_bpos, - bpos, - "unknown start of token", - c); - unicode_chars::check_for_substitution(self, c, &mut err); - self.fatal_errs.push(err); - - Err(()) + fn validate_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) { + let lit = self.str_from_to(content_start, content_end); + unescape::unescape_byte_str(lit, &mut |range, c| { + if let Err(err) = c { + emit_unescape_error( + &self.sess.span_diagnostic, + lit, + self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)), + unescape::Mode::ByteStr, + range, + err, + ) } - } - } - - fn consume_whitespace(&mut self) { - while is_pattern_whitespace(self.ch) && !self.is_eof() { - self.bump(); - } - } - - fn read_to_eol(&mut self) -> String { - let mut val = String::new(); - while !self.ch_is('\n') && !self.is_eof() { - val.push(self.ch.unwrap()); - self.bump(); - } - - if self.ch_is('\n') { - self.bump(); - } - - val - } - - fn read_one_line_comment(&mut self) -> String { - let val = self.read_to_eol(); - assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') || - (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!')); - val - } - - fn consume_non_eol_whitespace(&mut self) { - while is_pattern_whitespace(self.ch) && !self.ch_is('\n') && !self.is_eof() { - self.bump(); - } - } - - fn peeking_at_comment(&self) -> bool { - (self.ch_is('/') && self.nextch_is('/')) || (self.ch_is('/') && self.nextch_is('*')) || - // consider shebangs comments, but not inner attributes - (self.ch_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) - } - - fn scan_byte(&mut self) -> token::Lit { - self.bump(); - let start = self.pos; - - // the eof will be picked up by the final `'` check below - let c2 = self.ch.unwrap_or('\x00'); - self.bump(); - - let valid = self.scan_char_or_byte(start, - c2, - // ascii_only = - true, - '\''); - if !self.ch_is('\'') { - // Byte offsetting here is okay because the - // character before position `start` are an - // ascii single quote and ascii 'b'. - let pos = self.pos; - self.fatal_span_verbose(start - BytePos(2), - pos, - "unterminated byte constant".to_string()).raise(); - } - - let id = if valid { - self.name_from(start) - } else { - Symbol::intern("?") - }; - self.bump(); // advance ch past token - - token::Byte(id) - } - - #[inline] - fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool { - self.scan_hex_digits(2, delim, below_0x7f_only) + }) } - fn scan_byte_string(&mut self) -> token::Lit { - self.bump(); - let start = self.pos; - let mut valid = true; - - while !self.ch_is('"') { - if self.is_eof() { - let pos = self.pos; - self.fatal_span_(start, pos, "unterminated double quote byte string").raise(); - } - - let ch_start = self.pos; - let ch = self.ch.unwrap(); - self.bump(); - valid &= self.scan_char_or_byte(ch_start, - ch, - // ascii_only = - true, - '"'); - } - - let id = if valid { - self.name_from(start) - } else { - Symbol::intern("??") + fn validate_int_literal(&self, base: Base, content_start: BytePos, content_end: BytePos) { + let base = match base { + Base::Binary => 2, + Base::Octal => 8, + _ => return, }; - self.bump(); - - token::ByteStr(id) - } + let s = self.str_from_to(content_start + BytePos(2), content_end); + for (idx, c) in s.char_indices() { + let idx = idx as u32; + if c != '_' && c.to_digit(base).is_none() { + let lo = content_start + BytePos(2 + idx); + let hi = content_start + BytePos(2 + idx + c.len_utf8() as u32); + self.err_span_(lo, hi, + &format!("invalid digit for a base {} literal", base)); - fn scan_raw_byte_string(&mut self) -> token::Lit { - let start_bpos = self.pos; - self.bump(); - let mut hash_count = 0; - while self.ch_is('#') { - if hash_count == 65535 { - let bpos = self.next_pos; - self.fatal_span_(start_bpos, - bpos, - "too many `#` symbols: raw byte strings may be \ - delimited by up to 65535 `#` symbols").raise(); } - self.bump(); - hash_count += 1; - } - - if self.is_eof() { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } else if !self.ch_is('"') { - let pos = self.pos; - let ch = self.ch.unwrap(); - self.fatal_span_char(start_bpos, - pos, - "found invalid character; only `#` is allowed in raw \ - string delimitation", - ch).raise(); } - self.bump(); - let content_start_bpos = self.pos; - let mut content_end_bpos; - 'outer: loop { - match self.ch { - None => { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } - Some('"') => { - content_end_bpos = self.pos; - for _ in 0..hash_count { - self.bump(); - if !self.ch_is('#') { - continue 'outer; - } - } - break; - } - Some(c) => { - if c > '\x7F' { - let pos = self.pos; - self.err_span_char(pos, pos, "raw byte string must be ASCII", c); - } - } - } - self.bump(); - } - - self.bump(); - - token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos), hash_count) } } -// This tests the character for the unicode property 'PATTERN_WHITE_SPACE' which -// is guaranteed to be forward compatible. http://unicode.org/reports/tr31/#R3 -#[inline] -crate fn is_pattern_whitespace(c: Option<char>) -> bool { - c.map_or(false, Pattern_White_Space) -} - -#[inline] -fn in_range(c: Option<char>, lo: char, hi: char) -> bool { - c.map_or(false, |c| lo <= c && c <= hi) -} - -#[inline] -fn is_dec_digit(c: Option<char>) -> bool { - in_range(c, '0', '9') -} - fn is_doc_comment(s: &str) -> bool { let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') || s.starts_with("//!"); @@ -1883,289 +733,3 @@ fn is_block_doc_comment(s: &str) -> bool { debug!("is {:?} a doc comment? {}", s, res); res } - -/// Determine whether `c` is a valid start for an ident. -fn ident_start(c: Option<char>) -> bool { - let c = match c { - Some(c) => c, - None => return false, - }; - - (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start()) -} - -fn ident_continue(c: Option<char>) -> bool { - let c = match c { - Some(c) => c, - None => return false, - }; - - (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || - (c > '\x7f' && c.is_xid_continue()) -} - -#[inline] -fn char_at(s: &str, byte: usize) -> char { - s[byte..].chars().next().unwrap() -} - -#[cfg(test)] -mod tests { - use super::*; - - use crate::ast::{Ident, CrateConfig}; - use crate::symbol::Symbol; - use crate::source_map::SourceMap; - use crate::feature_gate::UnstableFeatures; - use crate::parse::token; - use crate::diagnostics::plugin::ErrorMap; - use crate::with_globals; - use std::io; - use std::path::PathBuf; - use syntax_pos::{BytePos, Span, NO_EXPANSION}; - use rustc_data_structures::fx::FxHashSet; - use rustc_data_structures::sync::Lock; - - fn mk_sess(sm: Lrc<SourceMap>) -> ParseSess { - let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), - Some(sm.clone()), - false, - false); - ParseSess { - span_diagnostic: errors::Handler::with_emitter(true, None, Box::new(emitter)), - unstable_features: UnstableFeatures::from_environment(), - config: CrateConfig::default(), - included_mod_stack: Lock::new(Vec::new()), - source_map: sm, - missing_fragment_specifiers: Lock::new(FxHashSet::default()), - raw_identifier_spans: Lock::new(Vec::new()), - registered_diagnostics: Lock::new(ErrorMap::new()), - buffered_lints: Lock::new(vec![]), - } - } - - // open a string reader for the given string - fn setup<'a>(sm: &SourceMap, - sess: &'a ParseSess, - teststr: String) - -> StringReader<'a> { - let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - StringReader::new(sess, sf, None) - } - - #[test] - fn t1() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut string_reader = setup(&sm, - &sh, - "/* my source file */ fn main() { println!(\"zebra\"); }\n" - .to_string()); - let id = Ident::from_str("fn"); - assert_eq!(string_reader.next_token().tok, token::Comment); - assert_eq!(string_reader.next_token().tok, token::Whitespace); - let tok1 = string_reader.next_token(); - let tok2 = TokenAndSpan { - tok: token::Ident(id, false), - sp: Span::new(BytePos(21), BytePos(23), NO_EXPANSION), - }; - assert_eq!(tok1.tok, tok2.tok); - assert_eq!(tok1.sp, tok2.sp); - assert_eq!(string_reader.next_token().tok, token::Whitespace); - // the 'main' id is already read: - assert_eq!(string_reader.pos.clone(), BytePos(28)); - // read another token: - let tok3 = string_reader.next_token(); - let tok4 = TokenAndSpan { - tok: mk_ident("main"), - sp: Span::new(BytePos(24), BytePos(28), NO_EXPANSION), - }; - assert_eq!(tok3.tok, tok4.tok); - assert_eq!(tok3.sp, tok4.sp); - // the lparen is already read: - assert_eq!(string_reader.pos.clone(), BytePos(29)) - }) - } - - // check that the given reader produces the desired stream - // of tokens (stop checking after exhausting the expected vec) - fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec<token::Token>) { - for expected_tok in &expected { - assert_eq!(&string_reader.next_token().tok, expected_tok); - } - } - - // make the identifier by looking up the string in the interner - fn mk_ident(id: &str) -> token::Token { - token::Token::from_ast_ident(Ident::from_str(id)) - } - - #[test] - fn doublecolonparsing() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a b".to_string()), - vec![mk_ident("a"), token::Whitespace, mk_ident("b")]); - }) - } - - #[test] - fn dcparsing_2() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a::b".to_string()), - vec![mk_ident("a"), token::ModSep, mk_ident("b")]); - }) - } - - #[test] - fn dcparsing_3() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a ::b".to_string()), - vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]); - }) - } - - #[test] - fn dcparsing_4() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a:: b".to_string()), - vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]); - }) - } - - #[test] - fn character_a() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token().tok, - token::Literal(token::Char(Symbol::intern("a")), None)); - }) - } - - #[test] - fn character_space() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token().tok, - token::Literal(token::Char(Symbol::intern(" ")), None)); - }) - } - - #[test] - fn character_escaped() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "'\\n'".to_string()).next_token().tok, - token::Literal(token::Char(Symbol::intern("\\n")), None)); - }) - } - - #[test] - fn lifetime_name() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "'abc".to_string()).next_token().tok, - token::Lifetime(Ident::from_str("'abc"))); - }) - } - - #[test] - fn raw_string() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()) - .next_token() - .tok, - token::Literal(token::StrRaw(Symbol::intern("\"#a\\b\x00c\""), 3), None)); - }) - } - - #[test] - fn literal_suffixes() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - macro_rules! test { - ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ - assert_eq!(setup(&sm, &sh, format!("{}suffix", $input)).next_token().tok, - token::Literal(token::$tok_type(Symbol::intern($tok_contents)), - Some(Symbol::intern("suffix")))); - // with a whitespace separator: - assert_eq!(setup(&sm, &sh, format!("{} suffix", $input)).next_token().tok, - token::Literal(token::$tok_type(Symbol::intern($tok_contents)), - None)); - }} - } - - test!("'a'", Char, "a"); - test!("b'a'", Byte, "a"); - test!("\"a\"", Str_, "a"); - test!("b\"a\"", ByteStr, "a"); - test!("1234", Integer, "1234"); - test!("0b101", Integer, "0b101"); - test!("0xABC", Integer, "0xABC"); - test!("1.0", Float, "1.0"); - test!("1.0e10", Float, "1.0e10"); - - assert_eq!(setup(&sm, &sh, "2us".to_string()).next_token().tok, - token::Literal(token::Integer(Symbol::intern("2")), - Some(Symbol::intern("us")))); - assert_eq!(setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token().tok, - token::Literal(token::StrRaw(Symbol::intern("raw"), 3), - Some(Symbol::intern("suffix")))); - assert_eq!(setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token().tok, - token::Literal(token::ByteStrRaw(Symbol::intern("raw"), 3), - Some(Symbol::intern("suffix")))); - }) - } - - #[test] - fn line_doc_comments() { - assert!(is_doc_comment("///")); - assert!(is_doc_comment("/// blah")); - assert!(!is_doc_comment("////")); - } - - #[test] - fn nested_block_comments() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string()); - match lexer.next_token().tok { - token::Comment => {} - _ => panic!("expected a comment!"), - } - assert_eq!(lexer.next_token().tok, - token::Literal(token::Char(Symbol::intern("a")), None)); - }) - } - - #[test] - fn crlf_comments() { - with_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string()); - let comment = lexer.next_token(); - assert_eq!(comment.tok, token::Comment); - assert_eq!((comment.sp.lo(), comment.sp.hi()), (BytePos(0), BytePos(7))); - assert_eq!(lexer.next_token().tok, token::Whitespace); - assert_eq!(lexer.next_token().tok, - token::DocComment(Symbol::intern("/// test"))); - }) - } -} diff --git a/src/libsyntax/parse/lexer/tests.rs b/src/libsyntax/parse/lexer/tests.rs new file mode 100644 index 00000000000..fc47e4f0b18 --- /dev/null +++ b/src/libsyntax/parse/lexer/tests.rs @@ -0,0 +1,255 @@ +use super::*; + +use crate::ast::CrateConfig; +use crate::symbol::Symbol; +use crate::source_map::{SourceMap, FilePathMapping}; +use crate::feature_gate::UnstableFeatures; +use crate::parse::token; +use crate::diagnostics::plugin::ErrorMap; +use crate::with_default_globals; +use std::io; +use std::path::PathBuf; +use syntax_pos::{BytePos, Span, NO_EXPANSION, edition::Edition}; +use rustc_data_structures::fx::{FxHashSet, FxHashMap}; +use rustc_data_structures::sync::{Lock, Once}; + +fn mk_sess(sm: Lrc<SourceMap>) -> ParseSess { + let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), + Some(sm.clone()), + false, + false, + false); + ParseSess { + span_diagnostic: errors::Handler::with_emitter(true, None, Box::new(emitter)), + unstable_features: UnstableFeatures::from_environment(), + config: CrateConfig::default(), + included_mod_stack: Lock::new(Vec::new()), + source_map: sm, + missing_fragment_specifiers: Lock::new(FxHashSet::default()), + raw_identifier_spans: Lock::new(Vec::new()), + registered_diagnostics: Lock::new(ErrorMap::new()), + buffered_lints: Lock::new(vec![]), + edition: Edition::from_session(), + ambiguous_block_expr_parse: Lock::new(FxHashMap::default()), + param_attr_spans: Lock::new(Vec::new()), + let_chains_spans: Lock::new(Vec::new()), + async_closure_spans: Lock::new(Vec::new()), + injected_crate_name: Once::new(), + } +} + +// open a string reader for the given string +fn setup<'a>(sm: &SourceMap, + sess: &'a ParseSess, + teststr: String) + -> StringReader<'a> { + let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); + StringReader::new(sess, sf, None) +} + +#[test] +fn t1() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + let mut string_reader = setup(&sm, + &sh, + "/* my source file */ fn main() { println!(\"zebra\"); }\n" + .to_string()); + assert_eq!(string_reader.next_token(), token::Comment); + assert_eq!(string_reader.next_token(), token::Whitespace); + let tok1 = string_reader.next_token(); + let tok2 = Token::new( + mk_ident("fn"), + Span::new(BytePos(21), BytePos(23), NO_EXPANSION), + ); + assert_eq!(tok1.kind, tok2.kind); + assert_eq!(tok1.span, tok2.span); + assert_eq!(string_reader.next_token(), token::Whitespace); + // read another token: + let tok3 = string_reader.next_token(); + assert_eq!(string_reader.pos.clone(), BytePos(28)); + let tok4 = Token::new( + mk_ident("main"), + Span::new(BytePos(24), BytePos(28), NO_EXPANSION), + ); + assert_eq!(tok3.kind, tok4.kind); + assert_eq!(tok3.span, tok4.span); + + assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren)); + assert_eq!(string_reader.pos.clone(), BytePos(29)) + }) +} + +// check that the given reader produces the desired stream +// of tokens (stop checking after exhausting the expected vec) +fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec<TokenKind>) { + for expected_tok in &expected { + assert_eq!(&string_reader.next_token(), expected_tok); + } +} + +// make the identifier by looking up the string in the interner +fn mk_ident(id: &str) -> TokenKind { + token::Ident(Symbol::intern(id), false) +} + +fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind { + TokenKind::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern)) +} + +#[test] +fn doublecolonparsing() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + check_tokenization(setup(&sm, &sh, "a b".to_string()), + vec![mk_ident("a"), token::Whitespace, mk_ident("b")]); + }) +} + +#[test] +fn dcparsing_2() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + check_tokenization(setup(&sm, &sh, "a::b".to_string()), + vec![mk_ident("a"), token::ModSep, mk_ident("b")]); + }) +} + +#[test] +fn dcparsing_3() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + check_tokenization(setup(&sm, &sh, "a ::b".to_string()), + vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]); + }) +} + +#[test] +fn dcparsing_4() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + check_tokenization(setup(&sm, &sh, "a:: b".to_string()), + vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]); + }) +} + +#[test] +fn character_a() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token(), + mk_lit(token::Char, "a", None)); + }) +} + +#[test] +fn character_space() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token(), + mk_lit(token::Char, " ", None)); + }) +} + +#[test] +fn character_escaped() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + assert_eq!(setup(&sm, &sh, "'\\n'".to_string()).next_token(), + mk_lit(token::Char, "\\n", None)); + }) +} + +#[test] +fn lifetime_name() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + assert_eq!(setup(&sm, &sh, "'abc".to_string()).next_token(), + token::Lifetime(Symbol::intern("'abc"))); + }) +} + +#[test] +fn raw_string() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + assert_eq!(setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token(), + mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None)); + }) +} + +#[test] +fn literal_suffixes() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + macro_rules! test { + ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ + assert_eq!(setup(&sm, &sh, format!("{}suffix", $input)).next_token(), + mk_lit(token::$tok_type, $tok_contents, Some("suffix"))); + // with a whitespace separator: + assert_eq!(setup(&sm, &sh, format!("{} suffix", $input)).next_token(), + mk_lit(token::$tok_type, $tok_contents, None)); + }} + } + + test!("'a'", Char, "a"); + test!("b'a'", Byte, "a"); + test!("\"a\"", Str, "a"); + test!("b\"a\"", ByteStr, "a"); + test!("1234", Integer, "1234"); + test!("0b101", Integer, "0b101"); + test!("0xABC", Integer, "0xABC"); + test!("1.0", Float, "1.0"); + test!("1.0e10", Float, "1.0e10"); + + assert_eq!(setup(&sm, &sh, "2us".to_string()).next_token(), + mk_lit(token::Integer, "2", Some("us"))); + assert_eq!(setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token(), + mk_lit(token::StrRaw(3), "raw", Some("suffix"))); + assert_eq!(setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token(), + mk_lit(token::ByteStrRaw(3), "raw", Some("suffix"))); + }) +} + +#[test] +fn line_doc_comments() { + assert!(is_doc_comment("///")); + assert!(is_doc_comment("/// blah")); + assert!(!is_doc_comment("////")); +} + +#[test] +fn nested_block_comments() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string()); + assert_eq!(lexer.next_token(), token::Comment); + assert_eq!(lexer.next_token(), mk_lit(token::Char, "a", None)); + }) +} + +#[test] +fn crlf_comments() { + with_default_globals(|| { + let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); + let sh = mk_sess(sm.clone()); + let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string()); + let comment = lexer.next_token(); + assert_eq!(comment.kind, token::Comment); + assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7))); + assert_eq!(lexer.next_token(), token::Whitespace); + assert_eq!(lexer.next_token(), token::DocComment(Symbol::intern("/// test"))); + }) +} diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 0db36c84cdf..37e67a2729e 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -1,13 +1,47 @@ +use syntax_pos::Span; + use crate::print::pprust::token_to_string; use crate::parse::lexer::{StringReader, UnmatchedBrace}; -use crate::parse::{token, PResult}; -use crate::tokenstream::{DelimSpan, IsJoint::*, TokenStream, TokenTree, TreeAndJoint}; +use crate::parse::token::{self, Token}; +use crate::parse::PResult; +use crate::tokenstream::{DelimSpan, IsJoint::{self, *}, TokenStream, TokenTree, TreeAndJoint}; impl<'a> StringReader<'a> { + crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) { + let mut tt_reader = TokenTreesReader { + string_reader: self, + token: Token::dummy(), + joint_to_prev: Joint, + open_braces: Vec::new(), + unmatched_braces: Vec::new(), + matching_delim_spans: Vec::new(), + last_unclosed_found_span: None, + }; + let res = tt_reader.parse_all_token_trees(); + (res, tt_reader.unmatched_braces) + } +} + +struct TokenTreesReader<'a> { + string_reader: StringReader<'a>, + token: Token, + joint_to_prev: IsJoint, + /// Stack of open delimiters and their spans. Used for error message. + open_braces: Vec<(token::DelimToken, Span)>, + unmatched_braces: Vec<UnmatchedBrace>, + /// The type and spans for all braces + /// + /// Used only for error recovery when arriving to EOF with mismatched braces. + matching_delim_spans: Vec<(token::DelimToken, Span, Span)>, + last_unclosed_found_span: Option<Span>, +} + +impl<'a> TokenTreesReader<'a> { // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. - crate fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { + fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { let mut tts = Vec::new(); + self.real_token(); while self.token != token::Eof { tts.push(self.parse_token_tree()?); } @@ -19,7 +53,7 @@ impl<'a> StringReader<'a> { fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { let mut tts = vec![]; loop { - if let token::CloseDelim(..) = self.token { + if let token::CloseDelim(..) = self.token.kind { return TokenStream::new(tts); } @@ -34,11 +68,12 @@ impl<'a> StringReader<'a> { } fn parse_token_tree(&mut self) -> PResult<'a, TreeAndJoint> { - let sm = self.sess.source_map(); - match self.token { + let sm = self.string_reader.sess.source_map(); + match self.token.kind { token::Eof => { let msg = "this file contains an un-closed delimiter"; - let mut err = self.sess.span_diagnostic.struct_span_err(self.span, msg); + let mut err = self.string_reader.sess.span_diagnostic + .struct_span_err(self.token.span, msg); for &(_, sp) in &self.open_braces { err.span_label(sp, "un-closed delimiter"); } @@ -46,13 +81,12 @@ impl<'a> StringReader<'a> { if let Some((delim, _)) = self.open_braces.last() { if let Some((_, open_sp, close_sp)) = self.matching_delim_spans.iter() .filter(|(d, open_sp, close_sp)| { - - if let Some(close_padding) = sm.span_to_margin(*close_sp) { - if let Some(open_padding) = sm.span_to_margin(*open_sp) { - return delim == d && close_padding != open_padding; + if let Some(close_padding) = sm.span_to_margin(*close_sp) { + if let Some(open_padding) = sm.span_to_margin(*open_sp) { + return delim == d && close_padding != open_padding; + } } - } - false + false }).next() // these are in reverse order as they get inserted on close, but { // we want the last open/first close err.span_label( @@ -69,10 +103,10 @@ impl<'a> StringReader<'a> { }, token::OpenDelim(delim) => { // The span for beginning of the delimited section - let pre_span = self.span; + let pre_span = self.token.span; // Parse the open delimiter. - self.open_braces.push((delim, self.span)); + self.open_braces.push((delim, self.token.span)); self.real_token(); // Parse the token trees within the delimiters. @@ -81,9 +115,9 @@ impl<'a> StringReader<'a> { let tts = self.parse_token_trees_until_close_delim(); // Expand to cover the entire delimited token tree - let delim_span = DelimSpan::from_pair(pre_span, self.span); + let delim_span = DelimSpan::from_pair(pre_span, self.token.span); - match self.token { + match self.token.kind { // Correct delimiter. token::CloseDelim(d) if d == delim => { let (open_brace, open_brace_span) = self.open_braces.pop().unwrap(); @@ -93,7 +127,7 @@ impl<'a> StringReader<'a> { self.matching_delim_spans.clear(); } else { self.matching_delim_spans.push( - (open_brace, open_brace_span, self.span), + (open_brace, open_brace_span, self.token.span), ); } // Parse the close delimiter. @@ -103,16 +137,16 @@ impl<'a> StringReader<'a> { token::CloseDelim(other) => { let mut unclosed_delimiter = None; let mut candidate = None; - if self.last_unclosed_found_span != Some(self.span) { + if self.last_unclosed_found_span != Some(self.token.span) { // do not complain about the same unclosed delimiter multiple times - self.last_unclosed_found_span = Some(self.span); + self.last_unclosed_found_span = Some(self.token.span); // This is a conservative error: only report the last unclosed // delimiter. The previous unclosed delimiters could actually be // closed! The parser just hasn't gotten to them yet. if let Some(&(_, sp)) = self.open_braces.last() { unclosed_delimiter = Some(sp); }; - if let Some(current_padding) = sm.span_to_margin(self.span) { + if let Some(current_padding) = sm.span_to_margin(self.token.span) { for (brace, brace_span) in &self.open_braces { if let Some(padding) = sm.span_to_margin(*brace_span) { // high likelihood of these two corresponding @@ -126,7 +160,7 @@ impl<'a> StringReader<'a> { self.unmatched_braces.push(UnmatchedBrace { expected_delim: tok, found_delim: other, - found_span: self.span, + found_span: self.token.span, unclosed_span: unclosed_delimiter, candidate_span: candidate, }); @@ -164,20 +198,33 @@ impl<'a> StringReader<'a> { // matching opening delimiter). let token_str = token_to_string(&self.token); let msg = format!("unexpected close delimiter: `{}`", token_str); - let mut err = self.sess.span_diagnostic.struct_span_err(self.span, &msg); - err.span_label(self.span, "unexpected close delimiter"); + let mut err = self.string_reader.sess.span_diagnostic + .struct_span_err(self.token.span, &msg); + err.span_label(self.token.span, "unexpected close delimiter"); Err(err) }, _ => { - let tt = TokenTree::Token(self.span, self.token.clone()); - // Note that testing for joint-ness here is done via the raw - // source span as the joint-ness is a property of the raw source - // rather than wanting to take `override_span` into account. - let raw = self.span_src_raw; + let tt = TokenTree::Token(self.token.take()); self.real_token(); - let is_joint = raw.hi() == self.span_src_raw.lo() && token::is_op(&self.token); + let is_joint = self.joint_to_prev == Joint && self.token.is_op(); Ok((tt, if is_joint { Joint } else { NonJoint })) } } } + + fn real_token(&mut self) { + self.joint_to_prev = Joint; + loop { + let token = self.string_reader.next_token(); + match token.kind { + token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => { + self.joint_to_prev = NonJoint; + } + _ => { + self.token = token; + return; + }, + } + } + } } diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 94ce6297fbe..eaa736c6a35 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -1,10 +1,12 @@ // Characters and their corresponding confusables were collected from // http://www.unicode.org/Public/security/10.0.0/confusables.txt -use syntax_pos::{Span, Pos, NO_EXPANSION}; -use errors::{Applicability, DiagnosticBuilder}; use super::StringReader; +use errors::{Applicability, DiagnosticBuilder}; +use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw}; +use crate::parse::token; +#[rustfmt::skip] // for line breaks const UNICODE_ARRAY: &[(char, &str, char)] = &[ (' ', "Line Separator", ' '), (' ', "Paragraph Separator", ' '), @@ -49,7 +51,7 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[ ('─', "Box Drawings Light Horizontal", '-'), ('━', "Box Drawings Heavy Horizontal", '-'), ('㇐', "CJK Stroke H", '-'), - ('ꟷ', "Latin Epigraphic Letter Dideways", '-'), + ('ꟷ', "Latin Epigraphic Letter Sideways I", '-'), ('ᅳ', "Hangul Jungseong Eu", '-'), ('ㅡ', "Hangul Letter Eu", '-'), ('一', "CJK Unified Ideograph-4E00", '-'), @@ -293,74 +295,99 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[ ('〉', "Right-Pointing Angle Bracket", '>'), ('〉', "Right Angle Bracket", '>'), ('》', "Right Double Angle Bracket", '>'), - ('>', "Fullwidth Greater-Than Sign", '>'), ]; - - -const ASCII_ARRAY: &[(char, &str)] = &[ - (' ', "Space"), - ('_', "Underscore"), - ('-', "Minus/Hyphen"), - (',', "Comma"), - (';', "Semicolon"), - (':', "Colon"), - ('!', "Exclamation Mark"), - ('?', "Question Mark"), - ('.', "Period"), - ('\'', "Single Quote"), - ('"', "Quotation Mark"), - ('(', "Left Parenthesis"), - (')', "Right Parenthesis"), - ('[', "Left Square Bracket"), - (']', "Right Square Bracket"), - ('{', "Left Curly Brace"), - ('}', "Right Curly Brace"), - ('*', "Asterisk"), - ('/', "Slash"), - ('\\', "Backslash"), - ('&', "Ampersand"), - ('+', "Plus Sign"), - ('<', "Less-Than Sign"), - ('=', "Equals Sign"), - ('>', "Greater-Than Sign"), ]; - -crate fn check_for_substitution<'a>(reader: &StringReader<'a>, - ch: char, - err: &mut DiagnosticBuilder<'a>) -> bool { - UNICODE_ARRAY - .iter() - .find(|&&(c, _, _)| c == ch) - .map(|&(_, u_name, ascii_char)| { - let span = Span::new(reader.pos, reader.next_pos, NO_EXPANSION); - match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) { - Some(&(ascii_char, ascii_name)) => { - // special help suggestion for "directed" double quotes - if let Some(s) = reader.peek_delimited('“', '”') { - let msg = format!("Unicode characters '“' (Left Double Quotation Mark) and \ - '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", - ascii_char, ascii_name); - err.span_suggestion( - Span::new(reader.pos, reader.next_pos + Pos::from_usize(s.len()) + - Pos::from_usize('”'.len_utf8()), NO_EXPANSION), - &msg, - format!("\"{}\"", s), - Applicability::MaybeIncorrect); - } else { - let msg = - format!("Unicode character '{}' ({}) looks like '{}' ({}), but it is not", - ch, u_name, ascii_char, ascii_name); - err.span_suggestion( - span, - &msg, - ascii_char.to_string(), - Applicability::MaybeIncorrect); - } - true - }, - None => { - let msg = format!("substitution character not found for '{}'", ch); - reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); - false - } + ('>', "Fullwidth Greater-Than Sign", '>'), +]; + +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add +// fancier error recovery to it, as there will be less overall work to do this way. +const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ + (' ', "Space", Some(token::Whitespace)), + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), + (',', "Comma", Some(token::Comma)), + (';', "Semicolon", Some(token::Semi)), + (':', "Colon", Some(token::Colon)), + ('!', "Exclamation Mark", Some(token::Not)), + ('?', "Question Mark", Some(token::Question)), + ('.', "Period", Some(token::Dot)), + ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), + (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), + ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), + (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))), + ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))), + ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))), + ('*', "Asterisk", Some(token::BinOp(token::Star))), + ('/', "Slash", Some(token::BinOp(token::Slash))), + ('\\', "Backslash", None), + ('&', "Ampersand", Some(token::BinOp(token::And))), + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), + ('<', "Less-Than Sign", Some(token::Lt)), + ('=', "Equals Sign", Some(token::Eq)), + ('>', "Greater-Than Sign", Some(token::Gt)), + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by + // spitting the correct token out. + ('\'', "Single Quote", None), + ('"', "Quotation Mark", None), +]; + +crate fn check_for_substitution<'a>( + reader: &StringReader<'a>, + pos: BytePos, + ch: char, + err: &mut DiagnosticBuilder<'a>, +) -> Option<token::TokenKind> { + let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) { + Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char), + None => return None, + }; + + let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION); + + let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) { + Some((_ascii_char, ascii_name, token)) => (ascii_name, token), + None => { + let msg = format!("substitution character not found for '{}'", ch); + reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); + return None; } - }).unwrap_or(false) + }; + + // special help suggestion for "directed" double quotes + if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') { + let msg = format!( + "Unicode characters '“' (Left Double Quotation Mark) and \ + '”' (Right Double Quotation Mark) look like '{}' ({}), but are not", + ascii_char, ascii_name + ); + err.span_suggestion( + Span::new( + pos, + pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()), + NO_EXPANSION, + ), + &msg, + format!("\"{}\"", s), + Applicability::MaybeIncorrect, + ); + } else { + let msg = format!( + "Unicode character '{}' ({}) looks like '{}' ({}), but it is not", + ch, u_name, ascii_char, ascii_name + ); + err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect); + } + token.clone() +} + +/// Extract string if found at current position with given delimiters +fn peek_delimited(text: &str, from_ch: char, to_ch: char) -> Option<&str> { + let mut chars = text.chars(); + let first_char = chars.next()?; + if first_char != from_ch { + return None; + } + let last_char_idx = chars.as_str().find(to_ch)?; + Some(&chars.as_str()[..last_char_idx]) } diff --git a/src/libsyntax/parse/literal.rs b/src/libsyntax/parse/literal.rs new file mode 100644 index 00000000000..6409acba573 --- /dev/null +++ b/src/libsyntax/parse/literal.rs @@ -0,0 +1,470 @@ +//! Code related to parsing literals. + +use crate::ast::{self, Lit, LitKind}; +use crate::parse::parser::Parser; +use crate::parse::PResult; +use crate::parse::token::{self, Token, TokenKind}; +use crate::print::pprust; +use crate::symbol::{kw, sym, Symbol}; +use crate::tokenstream::{TokenStream, TokenTree}; + +use errors::{Applicability, Handler}; +use log::debug; +use rustc_data_structures::sync::Lrc; +use syntax_pos::Span; +use rustc_lexer::unescape::{unescape_char, unescape_byte}; +use rustc_lexer::unescape::{unescape_str, unescape_byte_str}; +use rustc_lexer::unescape::{unescape_raw_str, unescape_raw_byte_str}; + +use std::ascii; + +crate enum LitError { + NotLiteral, + LexerError, + InvalidSuffix, + InvalidIntSuffix, + InvalidFloatSuffix, + NonDecimalFloat(u32), + IntTooLarge, +} + +impl LitError { + fn report(&self, diag: &Handler, lit: token::Lit, span: Span) { + let token::Lit { kind, suffix, .. } = lit; + match *self { + // `NotLiteral` is not an error by itself, so we don't report + // it and give the parser opportunity to try something else. + LitError::NotLiteral => {} + // `LexerError` *is* an error, but it was already reported + // by lexer, so here we don't report it the second time. + LitError::LexerError => {} + LitError::InvalidSuffix => { + expect_no_suffix( + diag, span, &format!("{} {} literal", kind.article(), kind.descr()), suffix + ); + } + LitError::InvalidIntSuffix => { + let suf = suffix.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['i', 'u'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for integer literal", &suf[1..]); + diag.struct_span_err(span, &msg) + .help("valid widths are 8, 16, 32, 64 and 128") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for integer literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("the suffix must be one of the integral types (`u32`, `isize`, etc)") + .emit(); + } + } + LitError::InvalidFloatSuffix => { + let suf = suffix.expect("suffix error with no suffix").as_str(); + if looks_like_width_suffix(&['f'], &suf) { + // If it looks like a width, try to be helpful. + let msg = format!("invalid width `{}` for float literal", &suf[1..]); + diag.struct_span_err(span, &msg) + .help("valid widths are 32 and 64") + .emit(); + } else { + let msg = format!("invalid suffix `{}` for float literal", suf); + diag.struct_span_err(span, &msg) + .span_label(span, format!("invalid suffix `{}`", suf)) + .help("valid suffixes are `f32` and `f64`") + .emit(); + } + } + LitError::NonDecimalFloat(base) => { + let descr = match base { + 16 => "hexadecimal", + 8 => "octal", + 2 => "binary", + _ => unreachable!(), + }; + diag.struct_span_err(span, &format!("{} float literal is not supported", descr)) + .span_label(span, "not supported") + .emit(); + } + LitError::IntTooLarge => { + diag.struct_span_err(span, "integer literal is too large") + .emit(); + } + } + } +} + +impl LitKind { + /// Converts literal token into a semantic literal. + fn from_lit_token(lit: token::Lit) -> Result<LitKind, LitError> { + let token::Lit { kind, symbol, suffix } = lit; + if suffix.is_some() && !kind.may_have_suffix() { + return Err(LitError::InvalidSuffix); + } + + Ok(match kind { + token::Bool => { + assert!(symbol == kw::True || symbol == kw::False); + LitKind::Bool(symbol == kw::True) + } + token::Byte => return unescape_byte(&symbol.as_str()) + .map(LitKind::Byte).map_err(|_| LitError::LexerError), + token::Char => return unescape_char(&symbol.as_str()) + .map(LitKind::Char).map_err(|_| LitError::LexerError), + + // There are some valid suffixes for integer and float literals, + // so all the handling is done internally. + token::Integer => return integer_lit(symbol, suffix), + token::Float => return float_lit(symbol, suffix), + + token::Str => { + // If there are no characters requiring special treatment we can + // reuse the symbol from the token. Otherwise, we must generate a + // new symbol because the string in the LitKind is different to the + // string in the token. + let s = symbol.as_str(); + let symbol = if s.contains(&['\\', '\r'][..]) { + let mut buf = String::with_capacity(s.len()); + let mut error = Ok(()); + unescape_str(&s, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => error = Err(LitError::LexerError), + } + }); + error?; + Symbol::intern(&buf) + } else { + symbol + }; + LitKind::Str(symbol, ast::StrStyle::Cooked) + } + token::StrRaw(n) => { + // Ditto. + let s = symbol.as_str(); + let symbol = if s.contains('\r') { + let mut buf = String::with_capacity(s.len()); + let mut error = Ok(()); + unescape_raw_str(&s, &mut |_, unescaped_char| { + match unescaped_char { + Ok(c) => buf.push(c), + Err(_) => error = Err(LitError::LexerError), + } + }); + error?; + buf.shrink_to_fit(); + Symbol::intern(&buf) + } else { + symbol + }; + LitKind::Str(symbol, ast::StrStyle::Raw(n)) + } + token::ByteStr => { + let s = symbol.as_str(); + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_byte_str(&s, &mut |_, unescaped_byte| { + match unescaped_byte { + Ok(c) => buf.push(c), + Err(_) => error = Err(LitError::LexerError), + } + }); + error?; + buf.shrink_to_fit(); + LitKind::ByteStr(Lrc::new(buf)) + } + token::ByteStrRaw(_) => { + let s = symbol.as_str(); + let bytes = if s.contains('\r') { + let mut buf = Vec::with_capacity(s.len()); + let mut error = Ok(()); + unescape_raw_byte_str(&s, &mut |_, unescaped_byte| { + match unescaped_byte { + Ok(c) => buf.push(c), + Err(_) => error = Err(LitError::LexerError), + } + }); + error?; + buf.shrink_to_fit(); + buf + } else { + symbol.to_string().into_bytes() + }; + + LitKind::ByteStr(Lrc::new(bytes)) + }, + token::Err => LitKind::Err(symbol), + }) + } + + /// Attempts to recover a token from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn to_lit_token(&self) -> token::Lit { + let (kind, symbol, suffix) = match *self { + LitKind::Str(symbol, ast::StrStyle::Cooked) => { + // Don't re-intern unless the escaped string is different. + let s = &symbol.as_str(); + let escaped = s.escape_default().to_string(); + let symbol = if escaped == *s { symbol } else { Symbol::intern(&escaped) }; + (token::Str, symbol, None) + } + LitKind::Str(symbol, ast::StrStyle::Raw(n)) => { + (token::StrRaw(n), symbol, None) + } + LitKind::ByteStr(ref bytes) => { + let string = bytes.iter().cloned().flat_map(ascii::escape_default) + .map(Into::<char>::into).collect::<String>(); + (token::ByteStr, Symbol::intern(&string), None) + } + LitKind::Byte(byte) => { + let string: String = ascii::escape_default(byte).map(Into::<char>::into).collect(); + (token::Byte, Symbol::intern(&string), None) + } + LitKind::Char(ch) => { + let string: String = ch.escape_default().map(Into::<char>::into).collect(); + (token::Char, Symbol::intern(&string), None) + } + LitKind::Int(n, ty) => { + let suffix = match ty { + ast::LitIntType::Unsigned(ty) => Some(ty.to_symbol()), + ast::LitIntType::Signed(ty) => Some(ty.to_symbol()), + ast::LitIntType::Unsuffixed => None, + }; + (token::Integer, sym::integer(n), suffix) + } + LitKind::Float(symbol, ty) => { + (token::Float, symbol, Some(ty.to_symbol())) + } + LitKind::FloatUnsuffixed(symbol) => { + (token::Float, symbol, None) + } + LitKind::Bool(value) => { + let symbol = if value { kw::True } else { kw::False }; + (token::Bool, symbol, None) + } + LitKind::Err(symbol) => { + (token::Err, symbol, None) + } + }; + + token::Lit::new(kind, symbol, suffix) + } +} + +impl Lit { + /// Converts literal token into an AST literal. + fn from_lit_token(token: token::Lit, span: Span) -> Result<Lit, LitError> { + Ok(Lit { token, node: LitKind::from_lit_token(token)?, span }) + } + + /// Converts arbitrary token into an AST literal. + crate fn from_token(token: &Token) -> Result<Lit, LitError> { + let lit = match token.kind { + token::Ident(name, false) if name == kw::True || name == kw::False => + token::Lit::new(token::Bool, name, None), + token::Literal(lit) => + lit, + token::Interpolated(ref nt) => { + if let token::NtExpr(expr) | token::NtLiteral(expr) = &**nt { + if let ast::ExprKind::Lit(lit) = &expr.node { + return Ok(lit.clone()); + } + } + return Err(LitError::NotLiteral); + } + _ => return Err(LitError::NotLiteral) + }; + + Lit::from_lit_token(lit, token.span) + } + + /// Attempts to recover an AST literal from semantic literal. + /// This function is used when the original token doesn't exist (e.g. the literal is created + /// by an AST-based macro) or unavailable (e.g. from HIR pretty-printing). + pub fn from_lit_kind(node: LitKind, span: Span) -> Lit { + Lit { token: node.to_lit_token(), node, span } + } + + /// Losslessly convert an AST literal into a token stream. + crate fn tokens(&self) -> TokenStream { + let token = match self.token.kind { + token::Bool => token::Ident(self.token.symbol, false), + _ => token::Literal(self.token), + }; + TokenTree::token(token, self.span).into() + } +} + +impl<'a> Parser<'a> { + /// Matches `lit = true | false | token_lit`. + crate fn parse_lit(&mut self) -> PResult<'a, Lit> { + let mut recovered = None; + if self.token == token::Dot { + // Attempt to recover `.4` as `0.4`. + recovered = self.look_ahead(1, |next_token| { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) + = next_token.kind { + if self.token.span.hi() == next_token.span.lo() { + let s = String::from("0.") + &symbol.as_str(); + let kind = TokenKind::lit(token::Float, Symbol::intern(&s), suffix); + return Some(Token::new(kind, self.token.span.to(next_token.span))); + } + } + None + }); + if let Some(token) = &recovered { + self.bump(); + self.diagnostic() + .struct_span_err(token.span, "float literals must have an integer part") + .span_suggestion( + token.span, + "must have an integer part", + pprust::token_to_string(token), + Applicability::MachineApplicable, + ) + .emit(); + } + } + + let token = recovered.as_ref().unwrap_or(&self.token); + match Lit::from_token(token) { + Ok(lit) => { + self.bump(); + Ok(lit) + } + Err(LitError::NotLiteral) => { + let msg = format!("unexpected token: {}", self.this_token_descr()); + Err(self.span_fatal(token.span, &msg)) + } + Err(err) => { + let (lit, span) = (token.expect_lit(), token.span); + self.bump(); + err.report(&self.sess.span_diagnostic, lit, span); + // Pack possible quotes and prefixes from the original literal into + // the error literal's symbol so they can be pretty-printed faithfully. + let suffixless_lit = token::Lit::new(lit.kind, lit.symbol, None); + let symbol = Symbol::intern(&suffixless_lit.to_string()); + let lit = token::Lit::new(token::Err, symbol, lit.suffix); + Lit::from_lit_token(lit, span).map_err(|_| unreachable!()) + } + } + } +} + +crate fn expect_no_suffix(diag: &Handler, sp: Span, kind: &str, suffix: Option<Symbol>) { + if let Some(suf) = suffix { + let mut err = if kind == "a tuple index" && + [sym::i32, sym::u32, sym::isize, sym::usize].contains(&suf) { + // #59553: warn instead of reject out of hand to allow the fix to percolate + // through the ecosystem when people fix their macros + let mut err = diag.struct_span_warn( + sp, + &format!("suffixes on {} are invalid", kind), + ); + err.note(&format!( + "`{}` is *temporarily* accepted on tuple index fields as it was \ + incorrectly accepted on stable for a few releases", + suf, + )); + err.help( + "on proc macros, you'll want to use `syn::Index::from` or \ + `proc_macro::Literal::*_unsuffixed` for code that will desugar \ + to tuple field access", + ); + err.note( + "for more context, see https://github.com/rust-lang/rust/issues/60210", + ); + err + } else { + diag.struct_span_err(sp, &format!("suffixes on {} are invalid", kind)) + }; + err.span_label(sp, format!("invalid suffix `{}`", suf)); + err.emit(); + } +} + +// Checks if `s` looks like i32 or u1234 etc. +fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) +} + +fn strip_underscores(symbol: Symbol) -> Symbol { + // Do not allocate a new string unless necessary. + let s = symbol.as_str(); + if s.contains('_') { + let mut s = s.to_string(); + s.retain(|c| c != '_'); + return Symbol::intern(&s); + } + symbol +} + +fn filtered_float_lit(symbol: Symbol, suffix: Option<Symbol>, base: u32) + -> Result<LitKind, LitError> { + debug!("filtered_float_lit: {:?}, {:?}, {:?}", symbol, suffix, base); + if base != 10 { + return Err(LitError::NonDecimalFloat(base)); + } + Ok(match suffix { + Some(suf) => match suf { + sym::f32 => LitKind::Float(symbol, ast::FloatTy::F32), + sym::f64 => LitKind::Float(symbol, ast::FloatTy::F64), + _ => return Err(LitError::InvalidFloatSuffix), + } + None => LitKind::FloatUnsuffixed(symbol) + }) +} + +fn float_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> { + debug!("float_lit: {:?}, {:?}", symbol, suffix); + filtered_float_lit(strip_underscores(symbol), suffix, 10) +} + +fn integer_lit(symbol: Symbol, suffix: Option<Symbol>) -> Result<LitKind, LitError> { + debug!("integer_lit: {:?}, {:?}", symbol, suffix); + let symbol = strip_underscores(symbol); + let s = symbol.as_str(); + + let mut base = 10; + if s.len() > 1 && s.as_bytes()[0] == b'0' { + match s.as_bytes()[1] { + b'x' => base = 16, + b'o' => base = 8, + b'b' => base = 2, + _ => {} + } + } + + let ty = match suffix { + Some(suf) => match suf { + sym::isize => ast::LitIntType::Signed(ast::IntTy::Isize), + sym::i8 => ast::LitIntType::Signed(ast::IntTy::I8), + sym::i16 => ast::LitIntType::Signed(ast::IntTy::I16), + sym::i32 => ast::LitIntType::Signed(ast::IntTy::I32), + sym::i64 => ast::LitIntType::Signed(ast::IntTy::I64), + sym::i128 => ast::LitIntType::Signed(ast::IntTy::I128), + sym::usize => ast::LitIntType::Unsigned(ast::UintTy::Usize), + sym::u8 => ast::LitIntType::Unsigned(ast::UintTy::U8), + sym::u16 => ast::LitIntType::Unsigned(ast::UintTy::U16), + sym::u32 => ast::LitIntType::Unsigned(ast::UintTy::U32), + sym::u64 => ast::LitIntType::Unsigned(ast::UintTy::U64), + sym::u128 => ast::LitIntType::Unsigned(ast::UintTy::U128), + // `1f64` and `2f32` etc. are valid float literals, and + // `fxxx` looks more like an invalid float literal than invalid integer literal. + _ if suf.as_str().starts_with('f') => return filtered_float_lit(symbol, suffix, base), + _ => return Err(LitError::InvalidIntSuffix), + } + _ => ast::LitIntType::Unsuffixed + }; + + let s = &s[if base != 10 { 2 } else { 0 } ..]; + u128::from_str_radix(s, base).map(|i| LitKind::Int(i, ty)).map_err(|_| { + // Small bases are lexed as if they were base 10, e.g, the string + // might be `0b10201`. This will cause the conversion above to fail, + // but these kinds of errors are already reported by the lexer. + let from_lexer = + base < 10 && s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); + if from_lexer { LitError::LexerError } else { LitError::IntTooLarge } + }) +} diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 371e8fe5cf6..80aa7a35266 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -5,39 +5,45 @@ use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId}; use crate::source_map::{SourceMap, FilePathMapping}; use crate::feature_gate::UnstableFeatures; use crate::parse::parser::Parser; -use crate::symbol::Symbol; -use crate::syntax::parse::parser::emit_unclosed_delims; +use crate::parse::parser::emit_unclosed_delims; +use crate::parse::token::TokenKind; use crate::tokenstream::{TokenStream, TokenTree}; use crate::diagnostics::plugin::ErrorMap; -use crate::print::pprust::token_to_string; +use crate::print::pprust; +use crate::symbol::Symbol; -use errors::{FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder}; -use rustc_data_structures::sync::{Lrc, Lock}; +use errors::{Applicability, FatalError, Level, Handler, ColorConfig, Diagnostic, DiagnosticBuilder}; +use rustc_data_structures::sync::{Lrc, Lock, Once}; use syntax_pos::{Span, SourceFile, FileName, MultiSpan}; -use log::debug; +use syntax_pos::edition::Edition; -use rustc_data_structures::fx::FxHashSet; +use rustc_data_structures::fx::{FxHashSet, FxHashMap}; use std::borrow::Cow; -use std::iter; use std::path::{Path, PathBuf}; use std::str; -pub type PResult<'a, T> = Result<T, DiagnosticBuilder<'a>>; +#[cfg(test)] +mod tests; #[macro_use] pub mod parser; - +pub mod attr; pub mod lexer; pub mod token; -pub mod attr; -pub mod classify; +crate mod classify; +crate mod diagnostics; +crate mod literal; +crate mod unescape_error_reporting; + +pub type PResult<'a, T> = Result<T, DiagnosticBuilder<'a>>; /// Info about a parsing session. pub struct ParseSess { pub span_diagnostic: Handler, pub unstable_features: UnstableFeatures, pub config: CrateConfig, + pub edition: Edition, pub missing_fragment_specifiers: Lock<FxHashSet<Span>>, /// Places where raw identifiers were used. This is used for feature-gating raw identifiers. pub raw_identifier_spans: Lock<Vec<Span>>, @@ -47,6 +53,16 @@ pub struct ParseSess { included_mod_stack: Lock<Vec<PathBuf>>, source_map: Lrc<SourceMap>, pub buffered_lints: Lock<Vec<BufferedEarlyLint>>, + /// Contains the spans of block expressions that could have been incomplete based on the + /// operation token that followed it, but that the parser cannot identify without further + /// analysis. + pub ambiguous_block_expr_parse: Lock<FxHashMap<Span, Span>>, + pub param_attr_spans: Lock<Vec<Span>>, + // Places where `let` exprs were used and should be feature gated according to `let_chains`. + pub let_chains_spans: Lock<Vec<Span>>, + // Places where `async || ..` exprs were used and should be feature gated. + pub async_closure_spans: Lock<Vec<Span>>, + pub injected_crate_name: Once<Symbol>, } impl ParseSess { @@ -70,6 +86,12 @@ impl ParseSess { included_mod_stack: Lock::new(vec![]), source_map, buffered_lints: Lock::new(vec![]), + edition: Edition::from_session(), + ambiguous_block_expr_parse: Lock::new(FxHashMap::default()), + param_attr_spans: Lock::new(Vec::new()), + let_chains_spans: Lock::new(Vec::new()), + async_closure_spans: Lock::new(Vec::new()), + injected_crate_name: Once::new(), } } @@ -93,6 +115,24 @@ impl ParseSess { }); }); } + + /// Extend an error with a suggestion to wrap an expression with parentheses to allow the + /// parser to continue parsing the following operation as part of the same expression. + pub fn expr_parentheses_needed( + &self, + err: &mut DiagnosticBuilder<'_>, + span: Span, + alt_snippet: Option<String>, + ) { + if let Some(snippet) = self.source_map().span_to_snippet(span).ok().or(alt_snippet) { + err.span_suggestion( + span, + "parentheses are required to parse this as an expression", + format!("({})", snippet), + Applicability::MachineApplicable, + ); + } + } } #[derive(Clone)] @@ -210,10 +250,10 @@ fn maybe_source_file_to_parser( ) -> Result<Parser<'_>, Vec<Diagnostic>> { let end_pos = source_file.end_pos; let (stream, unclosed_delims) = maybe_file_to_stream(sess, source_file, None)?; - let mut parser = stream_to_parser(sess, stream); + let mut parser = stream_to_parser(sess, stream, None); parser.unclosed_delims = unclosed_delims; - if parser.token == token::Eof && parser.span.is_dummy() { - parser.span = Span::new(end_pos, end_pos, parser.span.ctxt()); + if parser.token == token::Eof && parser.token.span.is_dummy() { + parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); } Ok(parser) @@ -222,7 +262,7 @@ fn maybe_source_file_to_parser( // must preserve old name for now, because quote! from the *existing* // compiler expands into it pub fn new_parser_from_tts(sess: &ParseSess, tts: Vec<TokenTree>) -> Parser<'_> { - stream_to_parser(sess, tts.into_iter().collect()) + stream_to_parser(sess, tts.into_iter().collect(), crate::MACRO_ARGUMENTS) } @@ -267,25 +307,25 @@ pub fn source_file_to_stream( } /// Given a source file, produces a sequence of token trees. Returns any buffered errors from -/// parsing the token tream. +/// parsing the token stream. pub fn maybe_file_to_stream( sess: &ParseSess, source_file: Lrc<SourceFile>, override_span: Option<Span>, ) -> Result<(TokenStream, Vec<lexer::UnmatchedBrace>), Vec<Diagnostic>> { - let mut srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?; - srdr.real_token(); + let srdr = lexer::StringReader::new(sess, source_file, override_span); + let (token_trees, unmatched_braces) = srdr.into_token_trees(); - match srdr.parse_all_token_trees() { - Ok(stream) => Ok((stream, srdr.unmatched_braces)), + match token_trees { + Ok(stream) => Ok((stream, unmatched_braces)), Err(err) => { let mut buffer = Vec::with_capacity(1); err.buffer(&mut buffer); // Not using `emit_unclosed_delims` to use `db.buffer` - for unmatched in srdr.unmatched_braces { + for unmatched in unmatched_braces { let mut db = sess.span_diagnostic.struct_span_err(unmatched.found_span, &format!( "incorrect close delimiter: `{}`", - token_to_string(&token::Token::CloseDelim(unmatched.found_delim)), + pprust::token_kind_to_string(&token::CloseDelim(unmatched.found_delim)), )); db.span_label(unmatched.found_span, "incorrect close delimiter"); if let Some(sp) = unmatched.candidate_span { @@ -302,477 +342,43 @@ pub fn maybe_file_to_stream( } /// Given stream and the `ParseSess`, produces a parser. -pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser<'_> { - Parser::new(sess, stream, None, true, false) -} - -/// Parses a string representing a character literal into its final form. -/// Rather than just accepting/rejecting a given literal, unescapes it as -/// well. Can take any slice prefixed by a character escape. Returns the -/// character and the number of characters consumed. -fn char_lit(lit: &str, diag: Option<(Span, &Handler)>) -> (char, isize) { - use std::char; - - // Handle non-escaped chars first. - if lit.as_bytes()[0] != b'\\' { - // If the first byte isn't '\\' it might part of a multi-byte char, so - // get the char with chars(). - let c = lit.chars().next().unwrap(); - return (c, 1); - } - - // Handle escaped chars. - match lit.as_bytes()[1] as char { - '"' => ('"', 2), - 'n' => ('\n', 2), - 'r' => ('\r', 2), - 't' => ('\t', 2), - '\\' => ('\\', 2), - '\'' => ('\'', 2), - '0' => ('\0', 2), - 'x' => { - let v = u32::from_str_radix(&lit[2..4], 16).unwrap(); - let c = char::from_u32(v).unwrap(); - (c, 4) - } - 'u' => { - assert_eq!(lit.as_bytes()[2], b'{'); - let idx = lit.find('}').unwrap(); - - // All digits and '_' are ascii, so treat each byte as a char. - let mut v: u32 = 0; - for c in lit[3..idx].bytes() { - let c = char::from(c); - if c != '_' { - let x = c.to_digit(16).unwrap(); - v = v.checked_mul(16).unwrap().checked_add(x).unwrap(); - } - } - let c = char::from_u32(v).unwrap_or_else(|| { - if let Some((span, diag)) = diag { - let mut diag = diag.struct_span_err(span, "invalid unicode character escape"); - if v > 0x10FFFF { - diag.help("unicode escape must be at most 10FFFF").emit(); - } else { - diag.help("unicode escape must not be a surrogate").emit(); - } - } - '\u{FFFD}' - }); - (c, (idx + 1) as isize) - } - _ => panic!("lexer should have rejected a bad character escape {}", lit) - } -} - -/// Parses a string representing a string literal into its final form. Does unescaping. -pub fn str_lit(lit: &str, diag: Option<(Span, &Handler)>) -> String { - debug!("str_lit: given {}", lit.escape_default()); - let mut res = String::with_capacity(lit.len()); - - let error = |i| format!("lexer should have rejected {} at {}", lit, i); - - /// Eat everything up to a non-whitespace. - fn eat<'a>(it: &mut iter::Peekable<str::CharIndices<'a>>) { - loop { - match it.peek().map(|x| x.1) { - Some(' ') | Some('\n') | Some('\r') | Some('\t') => { - it.next(); - }, - _ => { break; } - } - } - } - - let mut chars = lit.char_indices().peekable(); - while let Some((i, c)) = chars.next() { - match c { - '\\' => { - let ch = chars.peek().unwrap_or_else(|| { - panic!("{}", error(i)) - }).1; - - if ch == '\n' { - eat(&mut chars); - } else if ch == '\r' { - chars.next(); - let ch = chars.peek().unwrap_or_else(|| { - panic!("{}", error(i)) - }).1; - - if ch != '\n' { - panic!("lexer accepted bare CR"); - } - eat(&mut chars); - } else { - // otherwise, a normal escape - let (c, n) = char_lit(&lit[i..], diag); - for _ in 0..n - 1 { // we don't need to move past the first \ - chars.next(); - } - res.push(c); - } - }, - '\r' => { - let ch = chars.peek().unwrap_or_else(|| { - panic!("{}", error(i)) - }).1; - - if ch != '\n' { - panic!("lexer accepted bare CR"); - } - chars.next(); - res.push('\n'); - } - c => res.push(c), - } - } - - res.shrink_to_fit(); // probably not going to do anything, unless there was an escape. - debug!("parse_str_lit: returning {}", res); - res -} - -/// Parses a string representing a raw string literal into its final form. The -/// only operation this does is convert embedded CRLF into a single LF. -fn raw_str_lit(lit: &str) -> String { - debug!("raw_str_lit: given {}", lit.escape_default()); - let mut res = String::with_capacity(lit.len()); - - let mut chars = lit.chars().peekable(); - while let Some(c) = chars.next() { - if c == '\r' { - if *chars.peek().unwrap() != '\n' { - panic!("lexer accepted bare CR"); - } - chars.next(); - res.push('\n'); - } else { - res.push(c); - } - } - - res.shrink_to_fit(); - res -} - -// check if `s` looks like i32 or u1234 etc. -fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { - s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit()) -} - -macro_rules! err { - ($opt_diag:expr, |$span:ident, $diag:ident| $($body:tt)*) => { - match $opt_diag { - Some(($span, $diag)) => { $($body)* } - None => return None, - } - } -} - -crate fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Handler)>) - -> (bool /* suffix illegal? */, Option<ast::LitKind>) { - use ast::LitKind; - - match lit { - token::Byte(i) => (true, Some(LitKind::Byte(byte_lit(&i.as_str()).0))), - token::Char(i) => (true, Some(LitKind::Char(char_lit(&i.as_str(), diag).0))), - token::Err(i) => (true, Some(LitKind::Err(i))), - - // There are some valid suffixes for integer and float literals, - // so all the handling is done internally. - token::Integer(s) => (false, integer_lit(&s.as_str(), suf, diag)), - token::Float(s) => (false, float_lit(&s.as_str(), suf, diag)), - - token::Str_(mut sym) => { - // If there are no characters requiring special treatment we can - // reuse the symbol from the Token. Otherwise, we must generate a - // new symbol because the string in the LitKind is different to the - // string in the Token. - let s = &sym.as_str(); - if s.as_bytes().iter().any(|&c| c == b'\\' || c == b'\r') { - sym = Symbol::intern(&str_lit(s, diag)); - } - (true, Some(LitKind::Str(sym, ast::StrStyle::Cooked))) - } - token::StrRaw(mut sym, n) => { - // Ditto. - let s = &sym.as_str(); - if s.contains('\r') { - sym = Symbol::intern(&raw_str_lit(s)); - } - (true, Some(LitKind::Str(sym, ast::StrStyle::Raw(n)))) - } - token::ByteStr(i) => { - (true, Some(LitKind::ByteStr(byte_str_lit(&i.as_str())))) - } - token::ByteStrRaw(i, _) => { - (true, Some(LitKind::ByteStr(Lrc::new(i.to_string().into_bytes())))) - } - } -} - -fn filtered_float_lit(data: Symbol, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>) - -> Option<ast::LitKind> { - debug!("filtered_float_lit: {}, {:?}", data, suffix); - let suffix = match suffix { - Some(suffix) => suffix, - None => return Some(ast::LitKind::FloatUnsuffixed(data)), - }; - - Some(match &*suffix.as_str() { - "f32" => ast::LitKind::Float(data, ast::FloatTy::F32), - "f64" => ast::LitKind::Float(data, ast::FloatTy::F64), - suf => { - err!(diag, |span, diag| { - if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { - // if it looks like a width, lets try to be helpful. - let msg = format!("invalid width `{}` for float literal", &suf[1..]); - diag.struct_span_err(span, &msg).help("valid widths are 32 and 64").emit() - } else { - let msg = format!("invalid suffix `{}` for float literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("valid suffixes are `f32` and `f64`") - .emit(); - } - }); - - ast::LitKind::FloatUnsuffixed(data) - } - }) -} -fn float_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>) - -> Option<ast::LitKind> { - debug!("float_lit: {:?}, {:?}", s, suffix); - // FIXME #2252: bounds checking float literals is deferred until trans - - // Strip underscores without allocating a new String unless necessary. - let s2; - let s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::<String>(); - &s2 - } else { - s - }; - - filtered_float_lit(Symbol::intern(s), suffix, diag) -} - -/// Parses a string representing a byte literal into its final form. Similar to `char_lit`. -fn byte_lit(lit: &str) -> (u8, usize) { - let err = |i| format!("lexer accepted invalid byte literal {} step {}", lit, i); - - if lit.len() == 1 { - (lit.as_bytes()[0], 1) - } else { - assert_eq!(lit.as_bytes()[0], b'\\', "{}", err(0)); - let b = match lit.as_bytes()[1] { - b'"' => b'"', - b'n' => b'\n', - b'r' => b'\r', - b't' => b'\t', - b'\\' => b'\\', - b'\'' => b'\'', - b'0' => b'\0', - _ => { - match u64::from_str_radix(&lit[2..4], 16).ok() { - Some(c) => - if c > 0xFF { - panic!(err(2)) - } else { - return (c as u8, 4) - }, - None => panic!(err(3)) - } - } - }; - (b, 2) - } -} - -fn byte_str_lit(lit: &str) -> Lrc<Vec<u8>> { - let mut res = Vec::with_capacity(lit.len()); - - let error = |i| panic!("lexer should have rejected {} at {}", lit, i); - - /// Eat everything up to a non-whitespace. - fn eat<I: Iterator<Item=(usize, u8)>>(it: &mut iter::Peekable<I>) { - loop { - match it.peek().map(|x| x.1) { - Some(b' ') | Some(b'\n') | Some(b'\r') | Some(b'\t') => { - it.next(); - }, - _ => { break; } - } - } - } - - // byte string literals *must* be ASCII, but the escapes don't have to be - let mut chars = lit.bytes().enumerate().peekable(); - loop { - match chars.next() { - Some((i, b'\\')) => { - match chars.peek().unwrap_or_else(|| error(i)).1 { - b'\n' => eat(&mut chars), - b'\r' => { - chars.next(); - if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' { - panic!("lexer accepted bare CR"); - } - eat(&mut chars); - } - _ => { - // otherwise, a normal escape - let (c, n) = byte_lit(&lit[i..]); - // we don't need to move past the first \ - for _ in 0..n - 1 { - chars.next(); - } - res.push(c); - } - } - }, - Some((i, b'\r')) => { - if chars.peek().unwrap_or_else(|| error(i)).1 != b'\n' { - panic!("lexer accepted bare CR"); - } - chars.next(); - res.push(b'\n'); - } - Some((_, c)) => res.push(c), - None => break, - } - } - - Lrc::new(res) -} - -fn integer_lit(s: &str, suffix: Option<Symbol>, diag: Option<(Span, &Handler)>) - -> Option<ast::LitKind> { - // s can only be ascii, byte indexing is fine - - // Strip underscores without allocating a new String unless necessary. - let s2; - let mut s = if s.chars().any(|c| c == '_') { - s2 = s.chars().filter(|&c| c != '_').collect::<String>(); - &s2 - } else { - s - }; - - debug!("integer_lit: {}, {:?}", s, suffix); - - let mut base = 10; - let orig = s; - let mut ty = ast::LitIntType::Unsuffixed; - - if s.starts_with('0') && s.len() > 1 { - match s.as_bytes()[1] { - b'x' => base = 16, - b'o' => base = 8, - b'b' => base = 2, - _ => { } - } - } - - // 1f64 and 2f32 etc. are valid float literals. - if let Some(suf) = suffix { - if looks_like_width_suffix(&['f'], &suf.as_str()) { - let err = match base { - 16 => Some("hexadecimal float literal is not supported"), - 8 => Some("octal float literal is not supported"), - 2 => Some("binary float literal is not supported"), - _ => None, - }; - if let Some(err) = err { - err!(diag, |span, diag| { - diag.struct_span_err(span, err) - .span_label(span, "not supported") - .emit(); - }); - } - return filtered_float_lit(Symbol::intern(s), Some(suf), diag) - } - } - - if base != 10 { - s = &s[2..]; - } - - if let Some(suf) = suffix { - if suf.as_str().is_empty() { - err!(diag, |span, diag| diag.span_bug(span, "found empty literal suffix in Some")); - } - ty = match &*suf.as_str() { - "isize" => ast::LitIntType::Signed(ast::IntTy::Isize), - "i8" => ast::LitIntType::Signed(ast::IntTy::I8), - "i16" => ast::LitIntType::Signed(ast::IntTy::I16), - "i32" => ast::LitIntType::Signed(ast::IntTy::I32), - "i64" => ast::LitIntType::Signed(ast::IntTy::I64), - "i128" => ast::LitIntType::Signed(ast::IntTy::I128), - "usize" => ast::LitIntType::Unsigned(ast::UintTy::Usize), - "u8" => ast::LitIntType::Unsigned(ast::UintTy::U8), - "u16" => ast::LitIntType::Unsigned(ast::UintTy::U16), - "u32" => ast::LitIntType::Unsigned(ast::UintTy::U32), - "u64" => ast::LitIntType::Unsigned(ast::UintTy::U64), - "u128" => ast::LitIntType::Unsigned(ast::UintTy::U128), - suf => { - // i<digits> and u<digits> look like widths, so lets - // give an error message along those lines - err!(diag, |span, diag| { - if looks_like_width_suffix(&['i', 'u'], suf) { - let msg = format!("invalid width `{}` for integer literal", &suf[1..]); - diag.struct_span_err(span, &msg) - .help("valid widths are 8, 16, 32, 64 and 128") - .emit(); - } else { - let msg = format!("invalid suffix `{}` for numeric literal", suf); - diag.struct_span_err(span, &msg) - .span_label(span, format!("invalid suffix `{}`", suf)) - .help("the suffix must be one of the integral types \ - (`u32`, `isize`, etc)") - .emit(); - } - }); - - ty - } - } - } - - debug!("integer_lit: the type is {:?}, base {:?}, the new string is {:?}, the original \ - string was {:?}, the original suffix was {:?}", ty, base, s, orig, suffix); - - Some(match u128::from_str_radix(s, base) { - Ok(r) => ast::LitKind::Int(r, ty), - Err(_) => { - // small bases are lexed as if they were base 10, e.g, the string - // might be `0b10201`. This will cause the conversion above to fail, - // but these cases have errors in the lexer: we don't want to emit - // two errors, and we especially don't want to emit this error since - // it isn't necessarily true. - let already_errored = base < 10 && - s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base)); - - if !already_errored { - err!(diag, |span, diag| diag.span_err(span, "int literal is too large")); - } - ast::LitKind::Int(0, ty) - } - }) +pub fn stream_to_parser<'a>( + sess: &'a ParseSess, + stream: TokenStream, + subparser_name: Option<&'static str>, +) -> Parser<'a> { + Parser::new(sess, stream, None, true, false, subparser_name) +} + +/// Given stream, the `ParseSess` and the base directory, produces a parser. +/// +/// Use this function when you are creating a parser from the token stream +/// and also care about the current working directory of the parser (e.g., +/// you are trying to resolve modules defined inside a macro invocation). +/// +/// # Note +/// +/// The main usage of this function is outside of rustc, for those who uses +/// libsyntax as a library. Please do not remove this function while refactoring +/// just because it is not used in rustc codebase! +pub fn stream_to_parser_with_base_dir<'a>( + sess: &'a ParseSess, + stream: TokenStream, + base_dir: Directory<'a>, +) -> Parser<'a> { + Parser::new(sess, stream, Some(base_dir), true, false, None) } /// A sequence separator. pub struct SeqSep { - /// The seperator token. - pub sep: Option<token::Token>, + /// The separator token. + pub sep: Option<TokenKind>, /// `true` if a trailing separator is allowed. pub trailing_sep_allowed: bool, } impl SeqSep { - pub fn trailing_allowed(t: token::Token) -> SeqSep { + pub fn trailing_allowed(t: TokenKind) -> SeqSep { SeqSep { sep: Some(t), trailing_sep_allowed: true, @@ -786,300 +392,3 @@ impl SeqSep { } } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::ast::{self, Ident, PatKind}; - use crate::attr::first_attr_value_str_by_name; - use crate::ptr::P; - use crate::print::pprust::item_to_string; - use crate::tokenstream::{DelimSpan, TokenTree}; - use crate::util::parser_testing::string_to_stream; - use crate::util::parser_testing::{string_to_expr, string_to_item}; - use crate::with_globals; - use syntax_pos::{Span, BytePos, Pos, NO_EXPANSION}; - - /// Parses an item. - /// - /// Returns `Ok(Some(item))` when successful, `Ok(None)` when no item was found, and `Err` - /// when a syntax error occurred. - fn parse_item_from_source_str(name: FileName, source: String, sess: &ParseSess) - -> PResult<'_, Option<P<ast::Item>>> { - new_parser_from_source_str(sess, name, source).parse_item() - } - - // produce a syntax_pos::span - fn sp(a: u32, b: u32) -> Span { - Span::new(BytePos(a), BytePos(b), NO_EXPANSION) - } - - #[should_panic] - #[test] fn bad_path_expr_1() { - with_globals(|| { - string_to_expr("::abc::def::return".to_string()); - }) - } - - // check the token-tree-ization of macros - #[test] - fn string_to_tts_macro () { - with_globals(|| { - let tts: Vec<_> = - string_to_stream("macro_rules! zip (($a)=>($a))".to_string()).trees().collect(); - let tts: &[TokenTree] = &tts[..]; - - match (tts.len(), tts.get(0), tts.get(1), tts.get(2), tts.get(3)) { - ( - 4, - Some(&TokenTree::Token(_, token::Ident(name_macro_rules, false))), - Some(&TokenTree::Token(_, token::Not)), - Some(&TokenTree::Token(_, token::Ident(name_zip, false))), - Some(&TokenTree::Delimited(_, macro_delim, ref macro_tts)), - ) - if name_macro_rules.name == "macro_rules" - && name_zip.name == "zip" => { - let tts = ¯o_tts.trees().collect::<Vec<_>>(); - match (tts.len(), tts.get(0), tts.get(1), tts.get(2)) { - ( - 3, - Some(&TokenTree::Delimited(_, first_delim, ref first_tts)), - Some(&TokenTree::Token(_, token::FatArrow)), - Some(&TokenTree::Delimited(_, second_delim, ref second_tts)), - ) - if macro_delim == token::Paren => { - let tts = &first_tts.trees().collect::<Vec<_>>(); - match (tts.len(), tts.get(0), tts.get(1)) { - ( - 2, - Some(&TokenTree::Token(_, token::Dollar)), - Some(&TokenTree::Token(_, token::Ident(ident, false))), - ) - if first_delim == token::Paren && ident.name == "a" => {}, - _ => panic!("value 3: {:?} {:?}", first_delim, first_tts), - } - let tts = &second_tts.trees().collect::<Vec<_>>(); - match (tts.len(), tts.get(0), tts.get(1)) { - ( - 2, - Some(&TokenTree::Token(_, token::Dollar)), - Some(&TokenTree::Token(_, token::Ident(ident, false))), - ) - if second_delim == token::Paren && ident.name == "a" => {}, - _ => panic!("value 4: {:?} {:?}", second_delim, second_tts), - } - }, - _ => panic!("value 2: {:?} {:?}", macro_delim, macro_tts), - } - }, - _ => panic!("value: {:?}",tts), - } - }) - } - - #[test] - fn string_to_tts_1() { - with_globals(|| { - let tts = string_to_stream("fn a (b : i32) { b; }".to_string()); - - let expected = TokenStream::new(vec![ - TokenTree::Token(sp(0, 2), token::Ident(Ident::from_str("fn"), false)).into(), - TokenTree::Token(sp(3, 4), token::Ident(Ident::from_str("a"), false)).into(), - TokenTree::Delimited( - DelimSpan::from_pair(sp(5, 6), sp(13, 14)), - token::DelimToken::Paren, - TokenStream::new(vec![ - TokenTree::Token(sp(6, 7), - token::Ident(Ident::from_str("b"), false)).into(), - TokenTree::Token(sp(8, 9), token::Colon).into(), - TokenTree::Token(sp(10, 13), - token::Ident(Ident::from_str("i32"), false)).into(), - ]).into(), - ).into(), - TokenTree::Delimited( - DelimSpan::from_pair(sp(15, 16), sp(20, 21)), - token::DelimToken::Brace, - TokenStream::new(vec![ - TokenTree::Token(sp(17, 18), - token::Ident(Ident::from_str("b"), false)).into(), - TokenTree::Token(sp(18, 19), token::Semi).into(), - ]).into(), - ).into() - ]); - - assert_eq!(tts, expected); - }) - } - - #[test] fn parse_use() { - with_globals(|| { - let use_s = "use foo::bar::baz;"; - let vitem = string_to_item(use_s.to_string()).unwrap(); - let vitem_s = item_to_string(&vitem); - assert_eq!(&vitem_s[..], use_s); - - let use_s = "use foo::bar as baz;"; - let vitem = string_to_item(use_s.to_string()).unwrap(); - let vitem_s = item_to_string(&vitem); - assert_eq!(&vitem_s[..], use_s); - }) - } - - #[test] fn parse_extern_crate() { - with_globals(|| { - let ex_s = "extern crate foo;"; - let vitem = string_to_item(ex_s.to_string()).unwrap(); - let vitem_s = item_to_string(&vitem); - assert_eq!(&vitem_s[..], ex_s); - - let ex_s = "extern crate foo as bar;"; - let vitem = string_to_item(ex_s.to_string()).unwrap(); - let vitem_s = item_to_string(&vitem); - assert_eq!(&vitem_s[..], ex_s); - }) - } - - fn get_spans_of_pat_idents(src: &str) -> Vec<Span> { - let item = string_to_item(src.to_string()).unwrap(); - - struct PatIdentVisitor { - spans: Vec<Span> - } - impl<'a> crate::visit::Visitor<'a> for PatIdentVisitor { - fn visit_pat(&mut self, p: &'a ast::Pat) { - match p.node { - PatKind::Ident(_ , ref spannedident, _) => { - self.spans.push(spannedident.span.clone()); - } - _ => { - crate::visit::walk_pat(self, p); - } - } - } - } - let mut v = PatIdentVisitor { spans: Vec::new() }; - crate::visit::walk_item(&mut v, &item); - return v.spans; - } - - #[test] fn span_of_self_arg_pat_idents_are_correct() { - with_globals(|| { - - let srcs = ["impl z { fn a (&self, &myarg: i32) {} }", - "impl z { fn a (&mut self, &myarg: i32) {} }", - "impl z { fn a (&'a self, &myarg: i32) {} }", - "impl z { fn a (self, &myarg: i32) {} }", - "impl z { fn a (self: Foo, &myarg: i32) {} }", - ]; - - for &src in &srcs { - let spans = get_spans_of_pat_idents(src); - let (lo, hi) = (spans[0].lo(), spans[0].hi()); - assert!("self" == &src[lo.to_usize()..hi.to_usize()], - "\"{}\" != \"self\". src=\"{}\"", - &src[lo.to_usize()..hi.to_usize()], src) - } - }) - } - - #[test] fn parse_exprs () { - with_globals(|| { - // just make sure that they parse.... - string_to_expr("3 + 4".to_string()); - string_to_expr("a::z.froob(b,&(987+3))".to_string()); - }) - } - - #[test] fn attrs_fix_bug () { - with_globals(|| { - string_to_item("pub fn mk_file_writer(path: &Path, flags: &[FileFlag]) - -> Result<Box<Writer>, String> { - #[cfg(windows)] - fn wb() -> c_int { - (O_WRONLY | libc::consts::os::extra::O_BINARY) as c_int - } - - #[cfg(unix)] - fn wb() -> c_int { O_WRONLY as c_int } - - let mut fflags: c_int = wb(); -}".to_string()); - }) - } - - #[test] fn crlf_doc_comments() { - with_globals(|| { - let sess = ParseSess::new(FilePathMapping::empty()); - - let name_1 = FileName::Custom("crlf_source_1".to_string()); - let source = "/// doc comment\r\nfn foo() {}".to_string(); - let item = parse_item_from_source_str(name_1, source, &sess) - .unwrap().unwrap(); - let doc = first_attr_value_str_by_name(&item.attrs, "doc").unwrap(); - assert_eq!(doc, "/// doc comment"); - - let name_2 = FileName::Custom("crlf_source_2".to_string()); - let source = "/// doc comment\r\n/// line 2\r\nfn foo() {}".to_string(); - let item = parse_item_from_source_str(name_2, source, &sess) - .unwrap().unwrap(); - let docs = item.attrs.iter().filter(|a| a.path == "doc") - .map(|a| a.value_str().unwrap().to_string()).collect::<Vec<_>>(); - let b: &[_] = &["/// doc comment".to_string(), "/// line 2".to_string()]; - assert_eq!(&docs[..], b); - - let name_3 = FileName::Custom("clrf_source_3".to_string()); - let source = "/** doc comment\r\n * with CRLF */\r\nfn foo() {}".to_string(); - let item = parse_item_from_source_str(name_3, source, &sess).unwrap().unwrap(); - let doc = first_attr_value_str_by_name(&item.attrs, "doc").unwrap(); - assert_eq!(doc, "/** doc comment\n * with CRLF */"); - }); - } - - #[test] - fn ttdelim_span() { - fn parse_expr_from_source_str( - name: FileName, source: String, sess: &ParseSess - ) -> PResult<'_, P<ast::Expr>> { - new_parser_from_source_str(sess, name, source).parse_expr() - } - - with_globals(|| { - let sess = ParseSess::new(FilePathMapping::empty()); - let expr = parse_expr_from_source_str(PathBuf::from("foo").into(), - "foo!( fn main() { body } )".to_string(), &sess).unwrap(); - - let tts: Vec<_> = match expr.node { - ast::ExprKind::Mac(ref mac) => mac.node.stream().trees().collect(), - _ => panic!("not a macro"), - }; - - let span = tts.iter().rev().next().unwrap().span(); - - match sess.source_map().span_to_snippet(span) { - Ok(s) => assert_eq!(&s[..], "{ body }"), - Err(_) => panic!("could not get snippet"), - } - }); - } - - // This tests that when parsing a string (rather than a file) we don't try - // and read in a file for a module declaration and just parse a stub. - // See `recurse_into_file_modules` in the parser. - #[test] - fn out_of_line_mod() { - with_globals(|| { - let sess = ParseSess::new(FilePathMapping::empty()); - let item = parse_item_from_source_str( - PathBuf::from("foo").into(), - "mod foo { struct S; mod this_does_not_exist; }".to_owned(), - &sess, - ).unwrap().unwrap(); - - if let ast::ItemKind::Mod(ref m) = item.node { - assert!(m.items.len() == 2); - } else { - panic!(); - } - }); - } -} diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index fe31311094b..d85c2df16a3 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1,7 +1,9 @@ +// ignore-tidy-filelength + use crate::ast::{AngleBracketedArgs, ParenthesizedArgs, AttrStyle, BareFnTy}; use crate::ast::{GenericBound, TraitBoundModifier}; use crate::ast::Unsafety; -use crate::ast::{Mod, AnonConst, Arg, Arm, Guard, Attribute, BindingMode, TraitItemKind}; +use crate::ast::{Mod, AnonConst, Arg, Arm, Attribute, BindingMode, TraitItemKind}; use crate::ast::Block; use crate::ast::{BlockCheckMode, CaptureBy, Movability}; use crate::ast::{Constness, Crate}; @@ -13,7 +15,7 @@ use crate::ast::{ForeignItem, ForeignItemKind, FunctionRetTy}; use crate::ast::{GenericParam, GenericParamKind}; use crate::ast::GenericArg; use crate::ast::{Ident, ImplItem, IsAsync, IsAuto, Item, ItemKind}; -use crate::ast::{Label, Lifetime, Lit, LitKind}; +use crate::ast::{Label, Lifetime}; use crate::ast::Local; use crate::ast::MacStmtStyle; use crate::ast::{Mac, Mac_, MacDelimiter}; @@ -25,31 +27,33 @@ use crate::ast::{VariantData, StructField}; use crate::ast::StrStyle; use crate::ast::SelfKind; use crate::ast::{TraitItem, TraitRef, TraitObjectSyntax}; -use crate::ast::{Ty, TyKind, TypeBinding, GenericBounds}; +use crate::ast::{Ty, TyKind, AssocTyConstraint, AssocTyConstraintKind, GenericBounds}; use crate::ast::{Visibility, VisibilityKind, WhereClause, CrateSugar}; use crate::ast::{UseTree, UseTreeKind}; use crate::ast::{BinOpKind, UnOp}; use crate::ast::{RangeEnd, RangeSyntax}; use crate::{ast, attr}; use crate::ext::base::DummyResult; +use crate::ext::hygiene::SyntaxContext; use crate::source_map::{self, SourceMap, Spanned, respan}; -use crate::parse::{self, SeqSep, classify, token}; -use crate::parse::lexer::{TokenAndSpan, UnmatchedBrace}; +use crate::parse::{SeqSep, classify, literal, token}; +use crate::parse::lexer::UnmatchedBrace; use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; -use crate::parse::token::DelimToken; +use crate::parse::token::{Token, TokenKind, DelimToken}; use crate::parse::{new_sub_parser_from_file, ParseSess, Directory, DirectoryOwnership}; -use crate::util::parser::{AssocOp, Fixity}; +use crate::util::parser::{AssocOp, Fixity, prec_let_scrutinee_needs_par}; use crate::print::pprust; use crate::ptr::P; use crate::parse::PResult; use crate::ThinVec; use crate::tokenstream::{self, DelimSpan, TokenTree, TokenStream, TreeAndJoint}; -use crate::symbol::{Symbol, keywords}; +use crate::symbol::{kw, sym, Symbol}; +use crate::parse::diagnostics::{Error, dummy_arg}; use errors::{Applicability, DiagnosticBuilder, DiagnosticId, FatalError}; use rustc_target::spec::abi::{self, Abi}; -use syntax_pos::{Span, MultiSpan, BytePos, FileName}; -use log::{debug, trace}; +use syntax_pos::{Span, BytePos, DUMMY_SP, FileName}; +use log::debug; use std::borrow::Cow; use std::cmp; @@ -58,12 +62,12 @@ use std::path::{self, Path, PathBuf}; use std::slice; #[derive(Debug)] -/// Whether the type alias or associated type is a concrete type or an existential type +/// Whether the type alias or associated type is a concrete type or an opaque type pub enum AliasKind { /// Just a new name for the same type Weak(P<Ty>), /// Only trait impls of the type will be usable, not the actual type itself - Existential(GenericBounds), + OpaqueTy(GenericBounds), } bitflags::bitflags! { @@ -99,14 +103,14 @@ pub enum PathStyle { } #[derive(Clone, Copy, PartialEq, Debug)] -enum SemiColonMode { +crate enum SemiColonMode { Break, Ignore, Comma, } #[derive(Clone, Copy, PartialEq, Debug)] -enum BlockMode { +crate enum BlockMode { Break, Ignore, } @@ -118,24 +122,28 @@ enum BlockMode { /// `token::Interpolated` tokens. macro_rules! maybe_whole_expr { ($p:expr) => { - if let token::Interpolated(nt) = $p.token.clone() { - match *nt { - token::NtExpr(ref e) | token::NtLiteral(ref e) => { + if let token::Interpolated(nt) = &$p.token.kind { + match &**nt { + token::NtExpr(e) | token::NtLiteral(e) => { + let e = e.clone(); $p.bump(); - return Ok((*e).clone()); + return Ok(e); } - token::NtPath(ref path) => { + token::NtPath(path) => { + let path = path.clone(); $p.bump(); - let span = $p.span; - let kind = ExprKind::Path(None, (*path).clone()); - return Ok($p.mk_expr(span, kind, ThinVec::new())); + return Ok($p.mk_expr( + $p.token.span, ExprKind::Path(None, path), ThinVec::new() + )); } - token::NtBlock(ref block) => { + token::NtBlock(block) => { + let block = block.clone(); $p.bump(); - let span = $p.span; - let kind = ExprKind::Block((*block).clone(), None); - return Ok($p.mk_expr(span, kind, ThinVec::new())); + return Ok($p.mk_expr( + $p.token.span, ExprKind::Block(block, None), ThinVec::new() + )); } + // N.B: `NtIdent(ident)` is normalized to `Ident` in `fn bump`. _ => {}, }; } @@ -145,8 +153,9 @@ macro_rules! maybe_whole_expr { /// As maybe_whole_expr, but for things other than expressions macro_rules! maybe_whole { ($p:expr, $constructor:ident, |$x:ident| $e:expr) => { - if let token::Interpolated(nt) = $p.token.clone() { - if let token::$constructor($x) = (*nt).clone() { + if let token::Interpolated(nt) = &$p.token.kind { + if let token::$constructor(x) = &**nt { + let $x = x.clone(); $p.bump(); return Ok($e); } @@ -154,6 +163,21 @@ macro_rules! maybe_whole { }; } +/// If the next tokens are ill-formed `$ty::` recover them as `<$ty>::`. +macro_rules! maybe_recover_from_interpolated_ty_qpath { + ($self: expr, $allow_qpath_recovery: expr) => { + if $allow_qpath_recovery && $self.look_ahead(1, |t| t == &token::ModSep) { + if let token::Interpolated(nt) = &$self.token.kind { + if let token::NtTy(ty) = &**nt { + let ty = ty.clone(); + $self.bump(); + return $self.maybe_recover_from_bad_qpath_stage_2($self.prev_span, ty); + } + } + } + } +} + fn maybe_append(mut lhs: Vec<Attribute>, mut rhs: Option<Vec<Attribute>>) -> Vec<Attribute> { if let Some(ref mut rhs) = rhs { lhs.append(rhs); @@ -169,81 +193,40 @@ enum PrevTokenKind { Interpolated, Eof, Ident, + BitOr, Other, } -trait RecoverQPath: Sized { - const PATH_STYLE: PathStyle = PathStyle::Expr; - fn to_ty(&self) -> Option<P<Ty>>; - fn to_recovered(&self, qself: Option<QSelf>, path: ast::Path) -> Self; - fn to_string(&self) -> String; -} - -impl RecoverQPath for Ty { - const PATH_STYLE: PathStyle = PathStyle::Type; - fn to_ty(&self) -> Option<P<Ty>> { - Some(P(self.clone())) - } - fn to_recovered(&self, qself: Option<QSelf>, path: ast::Path) -> Self { - Self { span: path.span, node: TyKind::Path(qself, path), id: self.id } - } - fn to_string(&self) -> String { - pprust::ty_to_string(self) - } -} - -impl RecoverQPath for Pat { - fn to_ty(&self) -> Option<P<Ty>> { - self.to_ty() - } - fn to_recovered(&self, qself: Option<QSelf>, path: ast::Path) -> Self { - Self { span: path.span, node: PatKind::Path(qself, path), id: self.id } - } - fn to_string(&self) -> String { - pprust::pat_to_string(self) - } -} - -impl RecoverQPath for Expr { - fn to_ty(&self) -> Option<P<Ty>> { - self.to_ty() - } - fn to_recovered(&self, qself: Option<QSelf>, path: ast::Path) -> Self { - Self { span: path.span, node: ExprKind::Path(qself, path), - id: self.id, attrs: self.attrs.clone() } - } - fn to_string(&self) -> String { - pprust::expr_to_string(self) - } -} - -/* ident is handled by common.rs */ +// NOTE: `Ident`s are handled by `common.rs`. #[derive(Clone)] pub struct Parser<'a> { pub sess: &'a ParseSess, - /// the current token: - pub token: token::Token, - /// the span of the current token: - pub span: Span, - /// the span of the previous token: + /// The current normalized token. + /// "Normalized" means that some interpolated tokens + /// (`$i: ident` and `$l: lifetime` meta-variables) are replaced + /// with non-interpolated identifier and lifetime tokens they refer to. + /// Perhaps the normalized / non-normalized setup can be simplified somehow. + pub token: Token, + /// Span of the current non-normalized token. meta_var_span: Option<Span>, + /// Span of the previous non-normalized token. pub prev_span: Span, - /// the previous token kind + /// Kind of the previous normalized token (in simplified form). prev_token_kind: PrevTokenKind, restrictions: Restrictions, - /// Used to determine the path to externally loaded source files + /// Used to determine the path to externally loaded source files. crate directory: Directory<'a>, - /// Whether to parse sub-modules in other files. + /// `true` to parse sub-modules in other files. pub recurse_into_file_modules: bool, /// Name of the root module this parser originated from. If `None`, then the /// name is not known. This does not change while the parser is descending /// into modules, and sub-parsers have new values for this name. pub root_module_name: Option<String>, crate expected_tokens: Vec<TokenType>, - token_cursor: TokenCursor, + crate token_cursor: TokenCursor, desugar_doc_comments: bool, - /// Whether we should configure out of line modules as we parse. + /// `true` we should configure out of line modules as we parse. pub cfg_mods: bool, /// This field is used to keep track of how many left angle brackets we have seen. This is /// required in order to detect extra leading left angle brackets (`<` characters) and error @@ -256,7 +239,10 @@ pub struct Parser<'a> { /// it gets removed from here. Every entry left at the end gets emitted as an independent /// error. crate unclosed_delims: Vec<UnmatchedBrace>, - last_unexpected_token_span: Option<Span>, + crate last_unexpected_token_span: Option<Span>, + crate last_type_ascription: Option<(Span, bool /* likely path typo */)>, + /// If present, this `Parser` is not parsing Rust code but rather a macro call. + crate subparser_name: Option<&'static str>, } impl<'a> Drop for Parser<'a> { @@ -267,19 +253,19 @@ impl<'a> Drop for Parser<'a> { } #[derive(Clone)] -struct TokenCursor { - frame: TokenCursorFrame, - stack: Vec<TokenCursorFrame>, +crate struct TokenCursor { + crate frame: TokenCursorFrame, + crate stack: Vec<TokenCursorFrame>, } #[derive(Clone)] -struct TokenCursorFrame { - delim: token::DelimToken, - span: DelimSpan, - open_delim: bool, - tree_cursor: tokenstream::Cursor, - close_delim: bool, - last_token: LastToken, +crate struct TokenCursorFrame { + crate delim: token::DelimToken, + crate span: DelimSpan, + crate open_delim: bool, + crate tree_cursor: tokenstream::Cursor, + crate close_delim: bool, + crate last_token: LastToken, } /// This is used in `TokenCursorFrame` above to track tokens that are consumed @@ -300,16 +286,16 @@ struct TokenCursorFrame { /// You can find some more example usage of this in the `collect_tokens` method /// on the parser. #[derive(Clone)] -enum LastToken { +crate enum LastToken { Collecting(Vec<TreeAndJoint>), Was(Option<TreeAndJoint>), } impl TokenCursorFrame { - fn new(sp: DelimSpan, delim: DelimToken, tts: &TokenStream) -> Self { + fn new(span: DelimSpan, delim: DelimToken, tts: &TokenStream) -> Self { TokenCursorFrame { - delim: delim, - span: sp, + delim, + span, open_delim: delim == token::NoDelim, tree_cursor: tts.clone().into_trees(), close_delim: delim == token::NoDelim, @@ -319,7 +305,7 @@ impl TokenCursorFrame { } impl TokenCursor { - fn next(&mut self) -> TokenAndSpan { + fn next(&mut self) -> Token { loop { let tree = if !self.frame.open_delim { self.frame.open_delim = true; @@ -333,7 +319,7 @@ impl TokenCursor { self.frame = frame; continue } else { - return TokenAndSpan { tok: token::Eof, sp: syntax_pos::DUMMY_SP } + return Token::new(token::Eof, DUMMY_SP); }; match self.frame.last_token { @@ -342,7 +328,7 @@ impl TokenCursor { } match tree { - TokenTree::Token(sp, tok) => return TokenAndSpan { tok: tok, sp: sp }, + TokenTree::Token(token) => return token, TokenTree::Delimited(sp, delim, tts) => { let frame = TokenCursorFrame::new(sp, delim, &tts); self.stack.push(mem::replace(&mut self.frame, frame)); @@ -351,9 +337,9 @@ impl TokenCursor { } } - fn next_desugared(&mut self) -> TokenAndSpan { - let (sp, name) = match self.next() { - TokenAndSpan { sp, tok: token::DocComment(name) } => (sp, name), + fn next_desugared(&mut self) -> Token { + let (name, sp) = match self.next() { + Token { kind: token::DocComment(name), span } => (name, span), tok => return tok, }; @@ -376,10 +362,12 @@ impl TokenCursor { let body = TokenTree::Delimited( delim_span, token::Bracket, - [TokenTree::Token(sp, token::Ident(ast::Ident::from_str("doc"), false)), - TokenTree::Token(sp, token::Eq), - TokenTree::Token(sp, token::Literal( - token::StrRaw(Symbol::intern(&stripped), num_of_hashes), None)) + [ + TokenTree::token(token::Ident(sym::doc, false), sp), + TokenTree::token(token::Eq, sp), + TokenTree::token(TokenKind::lit( + token::StrRaw(num_of_hashes), Symbol::intern(&stripped), None + ), sp), ] .iter().cloned().collect::<TokenStream>().into(), ); @@ -388,10 +376,10 @@ impl TokenCursor { delim_span, token::NoDelim, &if doc_comment_style(&name.as_str()) == AttrStyle::Inner { - [TokenTree::Token(sp, token::Pound), TokenTree::Token(sp, token::Not), body] + [TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body] .iter().cloned().collect::<TokenStream>().into() } else { - [TokenTree::Token(sp, token::Pound), body] + [TokenTree::token(token::Pound, sp), body] .iter().cloned().collect::<TokenStream>().into() }, ))); @@ -402,8 +390,8 @@ impl TokenCursor { #[derive(Clone, PartialEq)] crate enum TokenType { - Token(token::Token), - Keyword(keywords::Keyword), + Token(TokenKind), + Keyword(Symbol), Operator, Lifetime, Ident, @@ -413,10 +401,10 @@ crate enum TokenType { } impl TokenType { - fn to_string(&self) -> String { + crate fn to_string(&self) -> String { match *self { - TokenType::Token(ref t) => format!("`{}`", pprust::token_to_string(t)), - TokenType::Keyword(kw) => format!("`{}`", kw.name()), + TokenType::Token(ref t) => format!("`{}`", pprust::token_kind_to_string(t)), + TokenType::Keyword(kw) => format!("`{}`", kw), TokenType::Operator => "an operator".to_string(), TokenType::Lifetime => "lifetime".to_string(), TokenType::Ident => "identifier".to_string(), @@ -432,7 +420,7 @@ impl TokenType { /// /// Types can also be of the form `IDENT(u8, u8) -> u8`, however this assumes /// that `IDENT` is not the ident of a fn trait. -fn can_continue_type_after_non_fn_ident(t: &token::Token) -> bool { +fn can_continue_type_after_non_fn_ident(t: &Token) -> bool { t == &token::ModSep || t == &token::Lt || t == &token::BinOp(token::Shl) } @@ -450,65 +438,6 @@ pub struct ModulePathSuccess { warn: bool, } -pub enum Error { - FileNotFoundForModule { - mod_name: String, - default_path: String, - secondary_path: String, - dir_path: String, - }, - DuplicatePaths { - mod_name: String, - default_path: String, - secondary_path: String, - }, - UselessDocComment, - InclusiveRangeWithNoEnd, -} - -impl Error { - fn span_err<S: Into<MultiSpan>>(self, - sp: S, - handler: &errors::Handler) -> DiagnosticBuilder<'_> { - match self { - Error::FileNotFoundForModule { ref mod_name, - ref default_path, - ref secondary_path, - ref dir_path } => { - let mut err = struct_span_err!(handler, sp, E0583, - "file not found for module `{}`", mod_name); - err.help(&format!("name the file either {} or {} inside the directory \"{}\"", - default_path, - secondary_path, - dir_path)); - err - } - Error::DuplicatePaths { ref mod_name, ref default_path, ref secondary_path } => { - let mut err = struct_span_err!(handler, sp, E0584, - "file for module `{}` found at both {} and {}", - mod_name, - default_path, - secondary_path); - err.help("delete or rename one of them to remove the ambiguity"); - err - } - Error::UselessDocComment => { - let mut err = struct_span_err!(handler, sp, E0585, - "found a documentation comment that doesn't document anything"); - err.help("doc comments must come before what they document, maybe a comment was \ - intended with `//`?"); - err - } - Error::InclusiveRangeWithNoEnd => { - let mut err = struct_span_err!(handler, sp, E0586, - "inclusive range with no end"); - err.help("inclusive ranges must be bounded at the end (`..=b` or `a..=b`)"); - err - } - } - } -} - #[derive(Debug)] enum LhsExpr { NotYetParsed, @@ -532,40 +461,25 @@ impl From<P<Expr>> for LhsExpr { } } -/// Creates a placeholder argument. -fn dummy_arg(span: Span) -> Arg { - let ident = Ident::new(keywords::Invalid.name(), span); - let pat = P(Pat { - id: ast::DUMMY_NODE_ID, - node: PatKind::Ident(BindingMode::ByValue(Mutability::Immutable), ident, None), - span, - }); - let ty = Ty { - node: TyKind::Err, - span, - id: ast::DUMMY_NODE_ID - }; - Arg { ty: P(ty), pat: pat, id: ast::DUMMY_NODE_ID } -} - #[derive(Copy, Clone, Debug)] -enum TokenExpectType { +crate enum TokenExpectType { Expect, NoExpect, } impl<'a> Parser<'a> { - pub fn new(sess: &'a ParseSess, - tokens: TokenStream, - directory: Option<Directory<'a>>, - recurse_into_file_modules: bool, - desugar_doc_comments: bool) - -> Self { + pub fn new( + sess: &'a ParseSess, + tokens: TokenStream, + directory: Option<Directory<'a>>, + recurse_into_file_modules: bool, + desugar_doc_comments: bool, + subparser_name: Option<&'static str>, + ) -> Self { let mut parser = Parser { sess, - token: token::Whitespace, - span: syntax_pos::DUMMY_SP, - prev_span: syntax_pos::DUMMY_SP, + token: Token::dummy(), + prev_span: DUMMY_SP, meta_var_span: None, prev_token_kind: PrevTokenKind::Other, restrictions: Restrictions::empty(), @@ -590,16 +504,17 @@ impl<'a> Parser<'a> { max_angle_bracket_count: 0, unclosed_delims: Vec::new(), last_unexpected_token_span: None, + last_type_ascription: None, + subparser_name, }; - let tok = parser.next_tok(); - parser.token = tok.tok; - parser.span = tok.sp; + parser.token = parser.next_tok(); if let Some(directory) = directory { parser.directory = directory; - } else if !parser.span.is_dummy() { - if let FileName::Real(mut path) = sess.source_map().span_to_unmapped_path(parser.span) { + } else if !parser.token.span.is_dummy() { + if let FileName::Real(mut path) = + sess.source_map().span_to_unmapped_path(parser.token.span) { path.pop(); parser.directory.path = Cow::from(path); } @@ -609,15 +524,15 @@ impl<'a> Parser<'a> { parser } - fn next_tok(&mut self) -> TokenAndSpan { + fn next_tok(&mut self) -> Token { let mut next = if self.desugar_doc_comments { self.token_cursor.next_desugared() } else { self.token_cursor.next() }; - if next.sp.is_dummy() { + if next.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. - next.sp = self.prev_span.with_ctxt(next.sp.ctxt()); + next.span = self.prev_span.with_ctxt(next.span.ctxt()); } next } @@ -627,17 +542,17 @@ impl<'a> Parser<'a> { pprust::token_to_string(&self.token) } - fn token_descr(&self) -> Option<&'static str> { - Some(match &self.token { - t if t.is_special_ident() => "reserved identifier", - t if t.is_used_keyword() => "keyword", - t if t.is_unused_keyword() => "reserved keyword", + crate fn token_descr(&self) -> Option<&'static str> { + Some(match &self.token.kind { + _ if self.token.is_special_ident() => "reserved identifier", + _ if self.token.is_used_keyword() => "keyword", + _ if self.token.is_unused_keyword() => "reserved keyword", token::DocComment(..) => "doc comment", _ => return None, }) } - fn this_token_descr(&self) -> String { + crate fn this_token_descr(&self) -> String { if let Some(prefix) = self.token_descr() { format!("{} `{}`", prefix, self.this_token_to_string()) } else { @@ -645,11 +560,6 @@ impl<'a> Parser<'a> { } } - fn unexpected_last<T>(&self, t: &token::Token) -> PResult<'a, T> { - let token_str = pprust::token_to_string(t); - Err(self.span_fatal(self.prev_span, &format!("unexpected token: `{}`", token_str))) - } - crate fn unexpected<T>(&mut self) -> PResult<'a, T> { match self.expect_one_of(&[], &[]) { Err(e) => Err(e), @@ -658,229 +568,45 @@ impl<'a> Parser<'a> { } /// Expects and consumes the token `t`. Signals an error if the next token is not `t`. - pub fn expect(&mut self, t: &token::Token) -> PResult<'a, bool /* recovered */> { + pub fn expect(&mut self, t: &TokenKind) -> PResult<'a, bool /* recovered */> { if self.expected_tokens.is_empty() { if self.token == *t { self.bump(); Ok(false) } else { - let token_str = pprust::token_to_string(t); - let this_token_str = self.this_token_descr(); - let mut err = self.fatal(&format!("expected `{}`, found {}", - token_str, - this_token_str)); - - let sp = if self.token == token::Token::Eof { - // EOF, don't want to point at the following char, but rather the last token - self.prev_span - } else { - self.sess.source_map().next_point(self.prev_span) - }; - let label_exp = format!("expected `{}`", token_str); - match self.recover_closing_delimiter(&[t.clone()], err) { - Err(e) => err = e, - Ok(recovered) => { - return Ok(recovered); - } - } - let cm = self.sess.source_map(); - match (cm.lookup_line(self.span.lo()), cm.lookup_line(sp.lo())) { - (Ok(ref a), Ok(ref b)) if a.line == b.line => { - // When the spans are in the same line, it means that the only content - // between them is whitespace, point only at the found token. - err.span_label(self.span, label_exp); - } - _ => { - err.span_label(sp, label_exp); - err.span_label(self.span, "unexpected token"); - } - } - Err(err) + self.unexpected_try_recover(t) } } else { self.expect_one_of(slice::from_ref(t), &[]) } } - fn recover_closing_delimiter( - &mut self, - tokens: &[token::Token], - mut err: DiagnosticBuilder<'a>, - ) -> PResult<'a, bool> { - let mut pos = None; - // we want to use the last closing delim that would apply - for (i, unmatched) in self.unclosed_delims.iter().enumerate().rev() { - if tokens.contains(&token::CloseDelim(unmatched.expected_delim)) - && Some(self.span) > unmatched.unclosed_span - { - pos = Some(i); - } - } - match pos { - Some(pos) => { - // Recover and assume that the detected unclosed delimiter was meant for - // this location. Emit the diagnostic and act as if the delimiter was - // present for the parser's sake. - - // Don't attempt to recover from this unclosed delimiter more than once. - let unmatched = self.unclosed_delims.remove(pos); - let delim = TokenType::Token(token::CloseDelim(unmatched.expected_delim)); - - // We want to suggest the inclusion of the closing delimiter where it makes - // the most sense, which is immediately after the last token: - // - // {foo(bar {}} - // - ^ - // | | - // | help: `)` may belong here (FIXME: #58270) - // | - // unclosed delimiter - if let Some(sp) = unmatched.unclosed_span { - err.span_label(sp, "unclosed delimiter"); - } - err.span_suggestion_short( - self.sess.source_map().next_point(self.prev_span), - &format!("{} may belong here", delim.to_string()), - delim.to_string(), - Applicability::MaybeIncorrect, - ); - err.emit(); - self.expected_tokens.clear(); // reduce errors - Ok(true) - } - _ => Err(err), - } - } - /// Expect next token to be edible or inedible token. If edible, /// then consume it; if inedible, then return without consuming /// anything. Signal a fatal error if next token is unexpected. pub fn expect_one_of( &mut self, - edible: &[token::Token], - inedible: &[token::Token], + edible: &[TokenKind], + inedible: &[TokenKind], ) -> PResult<'a, bool /* recovered */> { - fn tokens_to_string(tokens: &[TokenType]) -> String { - let mut i = tokens.iter(); - // This might be a sign we need a connect method on Iterator. - let b = i.next() - .map_or(String::new(), |t| t.to_string()); - i.enumerate().fold(b, |mut b, (i, a)| { - if tokens.len() > 2 && i == tokens.len() - 2 { - b.push_str(", or "); - } else if tokens.len() == 2 && i == tokens.len() - 2 { - b.push_str(" or "); - } else { - b.push_str(", "); - } - b.push_str(&a.to_string()); - b - }) - } - if edible.contains(&self.token) { + if edible.contains(&self.token.kind) { self.bump(); Ok(false) - } else if inedible.contains(&self.token) { + } else if inedible.contains(&self.token.kind) { // leave it in the input Ok(false) - } else if self.last_unexpected_token_span == Some(self.span) { + } else if self.last_unexpected_token_span == Some(self.token.span) { FatalError.raise(); } else { - let mut expected = edible.iter() - .map(|x| TokenType::Token(x.clone())) - .chain(inedible.iter().map(|x| TokenType::Token(x.clone()))) - .chain(self.expected_tokens.iter().cloned()) - .collect::<Vec<_>>(); - expected.sort_by_cached_key(|x| x.to_string()); - expected.dedup(); - let expect = tokens_to_string(&expected[..]); - let actual = self.this_token_to_string(); - let (msg_exp, (label_sp, label_exp)) = if expected.len() > 1 { - let short_expect = if expected.len() > 6 { - format!("{} possible tokens", expected.len()) - } else { - expect.clone() - }; - (format!("expected one of {}, found `{}`", expect, actual), - (self.sess.source_map().next_point(self.prev_span), - format!("expected one of {} here", short_expect))) - } else if expected.is_empty() { - (format!("unexpected token: `{}`", actual), - (self.prev_span, "unexpected token after this".to_string())) - } else { - (format!("expected {}, found `{}`", expect, actual), - (self.sess.source_map().next_point(self.prev_span), - format!("expected {} here", expect))) - }; - self.last_unexpected_token_span = Some(self.span); - let mut err = self.fatal(&msg_exp); - if self.token.is_ident_named("and") { - err.span_suggestion_short( - self.span, - "use `&&` instead of `and` for the boolean operator", - "&&".to_string(), - Applicability::MaybeIncorrect, - ); - } - if self.token.is_ident_named("or") { - err.span_suggestion_short( - self.span, - "use `||` instead of `or` for the boolean operator", - "||".to_string(), - Applicability::MaybeIncorrect, - ); - } - let sp = if self.token == token::Token::Eof { - // This is EOF, don't want to point at the following char, but rather the last token - self.prev_span - } else { - label_sp - }; - match self.recover_closing_delimiter(&expected.iter().filter_map(|tt| match tt { - TokenType::Token(t) => Some(t.clone()), - _ => None, - }).collect::<Vec<_>>(), err) { - Err(e) => err = e, - Ok(recovered) => { - return Ok(recovered); - } - } - - let cm = self.sess.source_map(); - match (cm.lookup_line(self.span.lo()), cm.lookup_line(sp.lo())) { - (Ok(ref a), Ok(ref b)) if a.line == b.line => { - // When the spans are in the same line, it means that the only content between - // them is whitespace, point at the found token in that case: - // - // X | () => { syntax error }; - // | ^^^^^ expected one of 8 possible tokens here - // - // instead of having: - // - // X | () => { syntax error }; - // | -^^^^^ unexpected token - // | | - // | expected one of 8 possible tokens here - err.span_label(self.span, label_exp); - } - _ if self.prev_span == syntax_pos::DUMMY_SP => { - // Account for macro context where the previous span might not be - // available to avoid incorrect output (#54841). - err.span_label(self.span, "unexpected token"); - } - _ => { - err.span_label(sp, label_exp); - err.span_label(self.span, "unexpected token"); - } - } - Err(err) + self.expected_one_of_not_found(edible, inedible) } } /// Returns the span of expr, if it was not interpolated or the span of the interpolated token. - fn interpolated_or_expr_span(&self, - expr: PResult<'a, P<Expr>>) - -> PResult<'a, (Span, P<Expr>)> { + fn interpolated_or_expr_span( + &self, + expr: PResult<'a, P<Expr>>, + ) -> PResult<'a, (Span, P<Expr>)> { expr.map(|e| { if self.prev_token_kind == PrevTokenKind::Interpolated { (self.prev_span, e) @@ -890,45 +616,13 @@ impl<'a> Parser<'a> { }) } - fn expected_ident_found(&self) -> DiagnosticBuilder<'a> { - let mut err = self.struct_span_err(self.span, - &format!("expected identifier, found {}", - self.this_token_descr())); - if let token::Ident(ident, false) = &self.token { - if ident.is_reserved() && !ident.is_path_segment_keyword() && - ident.name != keywords::Underscore.name() - { - err.span_suggestion( - self.span, - "you can escape reserved keywords to use them as identifiers", - format!("r#{}", ident), - Applicability::MaybeIncorrect, - ); - } - } - if let Some(token_descr) = self.token_descr() { - err.span_label(self.span, format!("expected identifier, found {}", token_descr)); - } else { - err.span_label(self.span, "expected identifier"); - if self.token == token::Comma && self.look_ahead(1, |t| t.is_ident()) { - err.span_suggestion( - self.span, - "remove this comma", - String::new(), - Applicability::MachineApplicable, - ); - } - } - err - } - pub fn parse_ident(&mut self) -> PResult<'a, ast::Ident> { self.parse_ident_common(true) } fn parse_ident_common(&mut self, recover: bool) -> PResult<'a, ast::Ident> { - match self.token { - token::Ident(ident, _) => { + match self.token.kind { + token::Ident(name, _) => { if self.token.is_reserved_ident() { let mut err = self.expected_ident_found(); if recover { @@ -937,16 +631,16 @@ impl<'a> Parser<'a> { return Err(err); } } - let span = self.span; + let span = self.token.span; self.bump(); - Ok(Ident::new(ident.name, span)) + Ok(Ident::new(name, span)) } _ => { Err(if self.prev_token_kind == PrevTokenKind::DocComment { - self.span_fatal_err(self.prev_span, Error::UselessDocComment) - } else { - self.expected_ident_found() - }) + self.span_fatal_err(self.prev_span, Error::UselessDocComment) + } else { + self.expected_ident_found() + }) } } } @@ -955,27 +649,27 @@ impl<'a> Parser<'a> { /// /// This method will automatically add `tok` to `expected_tokens` if `tok` is not /// encountered. - crate fn check(&mut self, tok: &token::Token) -> bool { + crate fn check(&mut self, tok: &TokenKind) -> bool { let is_present = self.token == *tok; if !is_present { self.expected_tokens.push(TokenType::Token(tok.clone())); } is_present } /// Consumes a token 'tok' if it exists. Returns whether the given token was present. - pub fn eat(&mut self, tok: &token::Token) -> bool { + pub fn eat(&mut self, tok: &TokenKind) -> bool { let is_present = self.check(tok); if is_present { self.bump() } is_present } - fn check_keyword(&mut self, kw: keywords::Keyword) -> bool { + fn check_keyword(&mut self, kw: Symbol) -> bool { self.expected_tokens.push(TokenType::Keyword(kw)); self.token.is_keyword(kw) } /// If the next token is the given keyword, eats it and returns /// `true`. Otherwise, returns `false`. - pub fn eat_keyword(&mut self, kw: keywords::Keyword) -> bool { + pub fn eat_keyword(&mut self, kw: Symbol) -> bool { if self.check_keyword(kw) { self.bump(); true @@ -984,7 +678,7 @@ impl<'a> Parser<'a> { } } - fn eat_keyword_noexpect(&mut self, kw: keywords::Keyword) -> bool { + fn eat_keyword_noexpect(&mut self, kw: Symbol) -> bool { if self.token.is_keyword(kw) { self.bump(); true @@ -996,7 +690,7 @@ impl<'a> Parser<'a> { /// If the given word is not a keyword, signals an error. /// If the next token is not the given word, signals an error. /// Otherwise, eats it. - fn expect_keyword(&mut self, kw: keywords::Keyword) -> PResult<'a, ()> { + fn expect_keyword(&mut self, kw: Symbol) -> PResult<'a, ()> { if !self.eat_keyword(kw) { self.unexpected() } else { @@ -1004,7 +698,7 @@ impl<'a> Parser<'a> { } } - fn check_ident(&mut self) -> bool { + crate fn check_ident(&mut self) -> bool { if self.token.is_ident() { true } else { @@ -1047,13 +741,13 @@ impl<'a> Parser<'a> { /// See issue #47856 for an example of when this may occur. fn eat_plus(&mut self) -> bool { self.expected_tokens.push(TokenType::Token(token::BinOp(token::Plus))); - match self.token { + match self.token.kind { token::BinOp(token::Plus) => { self.bump(); true } token::BinOpEq(token::Plus) => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); self.bump_with(token::Eq, span); true } @@ -1078,13 +772,13 @@ impl<'a> Parser<'a> { /// `&` and continues. If an `&` is not seen, signals an error. fn expect_and(&mut self) -> PResult<'a, ()> { self.expected_tokens.push(TokenType::Token(token::BinOp(token::And))); - match self.token { + match self.token.kind { token::BinOp(token::And) => { self.bump(); Ok(()) } token::AndAnd => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); Ok(self.bump_with(token::BinOp(token::And), span)) } _ => self.unexpected() @@ -1095,13 +789,13 @@ impl<'a> Parser<'a> { /// `|` and continues. If an `|` is not seen, signals an error. fn expect_or(&mut self) -> PResult<'a, ()> { self.expected_tokens.push(TokenType::Token(token::BinOp(token::Or))); - match self.token { + match self.token.kind { token::BinOp(token::Or) => { self.bump(); Ok(()) } token::OrOr => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); Ok(self.bump_with(token::BinOp(token::Or), span)) } _ => self.unexpected() @@ -1109,19 +803,7 @@ impl<'a> Parser<'a> { } fn expect_no_suffix(&self, sp: Span, kind: &str, suffix: Option<ast::Name>) { - match suffix { - None => {/* everything ok */} - Some(suf) => { - let text = suf.as_str(); - if text.is_empty() { - self.span_bug(sp, "found empty literal suffix in Some") - } - let msg = format!("{} with a suffix is invalid", kind); - self.struct_span_err(sp, &msg) - .span_label(sp, msg) - .emit(); - } - } + literal::expect_no_suffix(&self.sess.span_diagnostic, sp, kind, suffix) } /// Attempts to consume a `<`. If `<<` is seen, replaces it with a single @@ -1132,18 +814,18 @@ impl<'a> Parser<'a> { /// starting token. fn eat_lt(&mut self) -> bool { self.expected_tokens.push(TokenType::Token(token::Lt)); - let ate = match self.token { + let ate = match self.token.kind { token::Lt => { self.bump(); true } token::BinOp(token::Shl) => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); self.bump_with(token::Lt, span); true } token::LArrow => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); self.bump_with(token::BinOp(token::Minus), span); true } @@ -1172,21 +854,21 @@ impl<'a> Parser<'a> { /// with a single `>` and continues. If a `>` is not seen, signals an error. fn expect_gt(&mut self) -> PResult<'a, ()> { self.expected_tokens.push(TokenType::Token(token::Gt)); - let ate = match self.token { + let ate = match self.token.kind { token::Gt => { self.bump(); Some(()) } token::BinOp(token::Shr) => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); Some(self.bump_with(token::Gt, span)) } token::BinOpEq(token::Shr) => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); Some(self.bump_with(token::Ge, span)) } token::Ge => { - let span = self.span.with_lo(self.span.lo() + BytePos(1)); + let span = self.token.span.with_lo(self.token.span.lo() + BytePos(1)); Some(self.bump_with(token::Eq, span)) } _ => None, @@ -1206,30 +888,16 @@ impl<'a> Parser<'a> { } } - /// Eats and discards tokens until one of `kets` is encountered. Respects token trees, - /// passes through any errors encountered. Used for error recovery. - fn eat_to_tokens(&mut self, kets: &[&token::Token]) { - let handler = self.diagnostic(); - - if let Err(ref mut err) = self.parse_seq_to_before_tokens(kets, - SeqSep::none(), - TokenExpectType::Expect, - |p| Ok(p.parse_token_tree())) { - handler.cancel(err); - } - } - /// Parses a sequence, including the closing delimiter. The function /// `f` must consume tokens until reaching the next separator or /// closing bracket. - pub fn parse_seq_to_end<T, F>(&mut self, - ket: &token::Token, - sep: SeqSep, - f: F) - -> PResult<'a, Vec<T>> where - F: FnMut(&mut Parser<'a>) -> PResult<'a, T>, - { - let (val, recovered) = self.parse_seq_to_before_end(ket, sep, f)?; + pub fn parse_seq_to_end<T>( + &mut self, + ket: &TokenKind, + sep: SeqSep, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, Vec<T>> { + let (val, _, recovered) = self.parse_seq_to_before_end(ket, sep, f)?; if !recovered { self.bump(); } @@ -1239,39 +907,39 @@ impl<'a> Parser<'a> { /// Parses a sequence, not including the closing delimiter. The function /// `f` must consume tokens until reaching the next separator or /// closing bracket. - pub fn parse_seq_to_before_end<T, F>( + pub fn parse_seq_to_before_end<T>( &mut self, - ket: &token::Token, + ket: &TokenKind, sep: SeqSep, - f: F, - ) -> PResult<'a, (Vec<T>, bool)> - where F: FnMut(&mut Parser<'a>) -> PResult<'a, T> - { + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool, bool)> { self.parse_seq_to_before_tokens(&[ket], sep, TokenExpectType::Expect, f) } - fn parse_seq_to_before_tokens<T, F>( + fn expect_any_with_type(&mut self, kets: &[&TokenKind], expect: TokenExpectType) -> bool { + kets.iter().any(|k| { + match expect { + TokenExpectType::Expect => self.check(k), + TokenExpectType::NoExpect => self.token == **k, + } + }) + } + + crate fn parse_seq_to_before_tokens<T>( &mut self, - kets: &[&token::Token], + kets: &[&TokenKind], sep: SeqSep, expect: TokenExpectType, - mut f: F, - ) -> PResult<'a, (Vec<T>, bool /* recovered */)> - where F: FnMut(&mut Parser<'a>) -> PResult<'a, T> - { + mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool /* trailing */, bool /* recovered */)> { let mut first = true; let mut recovered = false; + let mut trailing = false; let mut v = vec![]; - while !kets.iter().any(|k| { - match expect { - TokenExpectType::Expect => self.check(k), - TokenExpectType::NoExpect => self.token == **k, - } - }) { - match self.token { - token::CloseDelim(..) | token::Eof => break, - _ => {} - }; + while !self.expect_any_with_type(kets, expect) { + if let token::CloseDelim(..) | token::Eof = self.token.kind { + break + } if let Some(ref t) = sep.sep { if first { first = false; @@ -1285,7 +953,7 @@ impl<'a> Parser<'a> { Err(mut e) => { // Attempt to keep parsing if it was a similar separator if let Some(ref tokens) = t.similar_tokens() { - if tokens.contains(&self.token) { + if tokens.contains(&self.token.kind) { self.bump(); } } @@ -1305,12 +973,8 @@ impl<'a> Parser<'a> { } } } - if sep.trailing_sep_allowed && kets.iter().any(|k| { - match expect { - TokenExpectType::Expect => self.check(k), - TokenExpectType::NoExpect => self.token == **k, - } - }) { + if sep.trailing_sep_allowed && self.expect_any_with_type(kets, expect) { + trailing = true; break; } @@ -1318,27 +982,45 @@ impl<'a> Parser<'a> { v.push(t); } - Ok((v, recovered)) + Ok((v, trailing, recovered)) } /// Parses a sequence, including the closing delimiter. The function /// `f` must consume tokens until reaching the next separator or /// closing bracket. - fn parse_unspanned_seq<T, F>( + fn parse_unspanned_seq<T>( &mut self, - bra: &token::Token, - ket: &token::Token, + bra: &TokenKind, + ket: &TokenKind, sep: SeqSep, - f: F, - ) -> PResult<'a, Vec<T>> where - F: FnMut(&mut Parser<'a>) -> PResult<'a, T>, - { + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool)> { self.expect(bra)?; - let (result, recovered) = self.parse_seq_to_before_end(ket, sep, f)?; + let (result, trailing, recovered) = self.parse_seq_to_before_end(ket, sep, f)?; if !recovered { self.eat(ket); } - Ok(result) + Ok((result, trailing)) + } + + fn parse_delim_comma_seq<T>( + &mut self, + delim: DelimToken, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool)> { + self.parse_unspanned_seq( + &token::OpenDelim(delim), + &token::CloseDelim(delim), + SeqSep::trailing_allowed(token::Comma), + f, + ) + } + + fn parse_paren_comma_seq<T>( + &mut self, + f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>, + ) -> PResult<'a, (Vec<T>, bool)> { + self.parse_delim_comma_seq(token::Paren, f) } /// Advance the parser by one token @@ -1348,22 +1030,21 @@ impl<'a> Parser<'a> { self.bug("attempted to bump the parser past EOF (may be stuck in a loop)"); } - self.prev_span = self.meta_var_span.take().unwrap_or(self.span); + self.prev_span = self.meta_var_span.take().unwrap_or(self.token.span); // Record last token kind for possible error recovery. - self.prev_token_kind = match self.token { + self.prev_token_kind = match self.token.kind { token::DocComment(..) => PrevTokenKind::DocComment, token::Comma => PrevTokenKind::Comma, token::BinOp(token::Plus) => PrevTokenKind::Plus, + token::BinOp(token::Or) => PrevTokenKind::BitOr, token::Interpolated(..) => PrevTokenKind::Interpolated, token::Eof => PrevTokenKind::Eof, token::Ident(..) => PrevTokenKind::Ident, _ => PrevTokenKind::Other, }; - let next = self.next_tok(); - self.span = next.sp; - self.token = next.tok; + self.token = self.next_tok(); self.expected_tokens.clear(); // check after each token self.process_potential_macro_variable(); @@ -1371,79 +1052,44 @@ impl<'a> Parser<'a> { /// Advance the parser using provided token as a next one. Use this when /// consuming a part of a token. For example a single `<` from `<<`. - fn bump_with(&mut self, next: token::Token, span: Span) { - self.prev_span = self.span.with_hi(span.lo()); + fn bump_with(&mut self, next: TokenKind, span: Span) { + self.prev_span = self.token.span.with_hi(span.lo()); // It would be incorrect to record the kind of the current token, but // fortunately for tokens currently using `bump_with`, the // prev_token_kind will be of no use anyway. self.prev_token_kind = PrevTokenKind::Other; - self.span = span; - self.token = next; + self.token = Token::new(next, span); self.expected_tokens.clear(); } pub fn look_ahead<R, F>(&self, dist: usize, f: F) -> R where - F: FnOnce(&token::Token) -> R, + F: FnOnce(&Token) -> R, { if dist == 0 { - return f(&self.token) + return f(&self.token); } - f(&match self.token_cursor.frame.tree_cursor.look_ahead(dist - 1) { + let frame = &self.token_cursor.frame; + f(&match frame.tree_cursor.look_ahead(dist - 1) { Some(tree) => match tree { - TokenTree::Token(_, tok) => tok, - TokenTree::Delimited(_, delim, _) => token::OpenDelim(delim), - }, - None => token::CloseDelim(self.token_cursor.frame.delim), + TokenTree::Token(token) => token, + TokenTree::Delimited(dspan, delim, _) => + Token::new(token::OpenDelim(delim), dspan.open), + } + None => Token::new(token::CloseDelim(frame.delim), frame.span.close) }) } - fn look_ahead_span(&self, dist: usize) -> Span { - if dist == 0 { - return self.span - } - - match self.token_cursor.frame.tree_cursor.look_ahead(dist - 1) { - Some(TokenTree::Token(span, _)) => span, - Some(TokenTree::Delimited(span, ..)) => span.entire(), - None => self.look_ahead_span(dist - 1), - } - } - pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> { - self.sess.span_diagnostic.struct_span_fatal(self.span, m) - } - pub fn span_fatal<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { - self.sess.span_diagnostic.struct_span_fatal(sp, m) - } - fn span_fatal_err<S: Into<MultiSpan>>(&self, sp: S, err: Error) -> DiagnosticBuilder<'a> { - err.span_err(sp, self.diagnostic()) - } - fn bug(&self, m: &str) -> ! { - self.sess.span_diagnostic.span_bug(self.span, m) - } - fn span_err<S: Into<MultiSpan>>(&self, sp: S, m: &str) { - self.sess.span_diagnostic.span_err(sp, m) - } - fn struct_span_err<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> DiagnosticBuilder<'a> { - self.sess.span_diagnostic.struct_span_err(sp, m) - } - crate fn span_bug<S: Into<MultiSpan>>(&self, sp: S, m: &str) -> ! { - self.sess.span_diagnostic.span_bug(sp, m) - } - - fn cancel(&self, err: &mut DiagnosticBuilder<'_>) { - self.sess.span_diagnostic.cancel(err) - } - - crate fn diagnostic(&self) -> &'a errors::Handler { - &self.sess.span_diagnostic + /// Returns whether any of the given keywords are `dist` tokens ahead of the current one. + fn is_keyword_ahead(&self, dist: usize, kws: &[Symbol]) -> bool { + self.look_ahead(dist, |t| kws.iter().any(|&kw| t.is_keyword(kw))) } /// Is the current token one of the keywords that signals a bare function type? fn token_is_bare_fn_keyword(&mut self) -> bool { - self.check_keyword(keywords::Fn) || - self.check_keyword(keywords::Unsafe) || - self.check_keyword(keywords::Extern) + self.check_keyword(kw::Fn) || + self.check_keyword(kw::Unsafe) || + self.check_keyword(kw::Extern) } /// Parses a `TyKind::BareFn` type. @@ -1461,13 +1107,13 @@ impl<'a> Parser<'a> { */ let unsafety = self.parse_unsafety(); - let abi = if self.eat_keyword(keywords::Extern) { + let abi = if self.eat_keyword(kw::Extern) { self.parse_opt_abi()?.unwrap_or(Abi::C) } else { Abi::Rust }; - self.expect_keyword(keywords::Fn)?; + self.expect_keyword(kw::Fn)?; let (inputs, c_variadic) = self.parse_fn_args(false, true)?; let ret_ty = self.parse_ret_ty(false)?; let decl = P(FnDecl { @@ -1485,7 +1131,7 @@ impl<'a> Parser<'a> { /// Parses asyncness: `async` or nothing. fn parse_asyncness(&mut self) -> IsAsync { - if self.eat_keyword(keywords::Async) { + if self.eat_keyword(kw::Async) { IsAsync::Async { closure_id: ast::DUMMY_NODE_ID, return_impl_trait_id: ast::DUMMY_NODE_ID, @@ -1497,7 +1143,7 @@ impl<'a> Parser<'a> { /// Parses unsafety: `unsafe` or nothing. fn parse_unsafety(&mut self) -> Unsafety { - if self.eat_keyword(keywords::Unsafe) { + if self.eat_keyword(kw::Unsafe) { Unsafety::Unsafe } else { Unsafety::Normal @@ -1525,12 +1171,12 @@ impl<'a> Parser<'a> { fn parse_trait_item_(&mut self, at_end: &mut bool, mut attrs: Vec<Attribute>) -> PResult<'a, TraitItem> { - let lo = self.span; - - let (name, node, generics) = if self.eat_keyword(keywords::Type) { + let lo = self.token.span; + self.eat_bad_pub(); + let (name, node, generics) = if self.eat_keyword(kw::Type) { self.parse_trait_item_assoc_ty()? } else if self.is_const_item() { - self.expect_keyword(keywords::Const)?; + self.expect_keyword(kw::Const)?; let ident = self.parse_ident()?; self.expect(&token::Colon)?; let ty = self.parse_ty()?; @@ -1545,20 +1191,21 @@ impl<'a> Parser<'a> { (ident, TraitItemKind::Const(ty, default), ast::Generics::default()) } else if let Some(mac) = self.parse_assoc_macro_invoc("trait", None, &mut false)? { // trait item macro. - (keywords::Invalid.ident(), ast::TraitItemKind::Macro(mac), ast::Generics::default()) + (Ident::invalid(), ast::TraitItemKind::Macro(mac), ast::Generics::default()) } else { let (constness, unsafety, asyncness, abi) = self.parse_fn_front_matter()?; let ident = self.parse_ident()?; let mut generics = self.parse_generics()?; - let d = self.parse_fn_decl_with_self(|p: &mut Parser<'a>| { + let decl = self.parse_fn_decl_with_self(|p: &mut Parser<'a>| { // This is somewhat dubious; We don't want to allow // argument names to be left off if there is a // definition... // We don't allow argument names to be left off in edition 2018. - p.parse_arg_general(p.span.rust_2018(), true, false) + let is_name_required = p.token.span.rust_2018(); + p.parse_arg_general(true, false, |_| is_name_required) })?; generics.where_clause = self.parse_where_clause()?; @@ -1569,10 +1216,10 @@ impl<'a> Parser<'a> { abi, asyncness, }, - decl: d, + decl, }; - let body = match self.token { + let body = match self.token.kind { token::Semi => { self.bump(); *at_end = true; @@ -1595,20 +1242,12 @@ impl<'a> Parser<'a> { Some(body) } _ => { - let token_str = self.this_token_descr(); - let mut err = self.fatal(&format!("expected `;` or `{{`, found {}", - token_str)); - err.span_label(self.span, "expected `;` or `{`"); - return Err(err); + return self.expected_semi_or_open_brace(); } } } _ => { - let token_str = self.this_token_descr(); - let mut err = self.fatal(&format!("expected `;` or `{{`, found {}", - token_str)); - err.span_label(self.span, "expected `;` or `{`"); - return Err(err); + return self.expected_semi_or_open_brace(); } }; (ident, ast::TraitItemKind::Method(sig, body), generics) @@ -1630,7 +1269,7 @@ impl<'a> Parser<'a> { if self.eat(&token::RArrow) { Ok(FunctionRetTy::Ty(self.parse_ty_common(allow_plus, true, false)?)) } else { - Ok(FunctionRetTy::Default(self.span.shrink_to_lo())) + Ok(FunctionRetTy::Default(self.token.span.shrink_to_lo())) } } @@ -1651,9 +1290,10 @@ impl<'a> Parser<'a> { fn parse_ty_common(&mut self, allow_plus: bool, allow_qpath_recovery: bool, allow_c_variadic: bool) -> PResult<'a, P<Ty>> { + maybe_recover_from_interpolated_ty_qpath!(self, allow_qpath_recovery); maybe_whole!(self, NtTy, |x| x); - let lo = self.span; + let lo = self.token.span; let mut impl_dyn_multi = false; let node = if self.eat(&token::OpenDelim(token::Paren)) { // `(TYPE)` is a parenthesized type. @@ -1717,7 +1357,7 @@ impl<'a> Parser<'a> { // Reference self.expect_and()?; self.parse_borrowed_pointee()? - } else if self.eat_keyword_noexpect(keywords::Typeof) { + } else if self.eat_keyword_noexpect(kw::Typeof) { // `typeof(EXPR)` // In order to not be ambiguous, the type must be surrounded by parens. self.expect(&token::OpenDelim(token::Paren))?; @@ -1727,17 +1367,17 @@ impl<'a> Parser<'a> { }; self.expect(&token::CloseDelim(token::Paren))?; TyKind::Typeof(e) - } else if self.eat_keyword(keywords::Underscore) { + } else if self.eat_keyword(kw::Underscore) { // A type to be inferred `_` TyKind::Infer } else if self.token_is_bare_fn_keyword() { // Function pointer type self.parse_ty_bare_fn(Vec::new())? - } else if self.check_keyword(keywords::For) { + } else if self.check_keyword(kw::For) { // Function pointer type or bound list (trait object type) starting with a poly-trait. // `for<'lt> [unsafe] [extern "ABI"] fn (&'lt S) -> T` // `for<'lt> Trait1<'lt> + Trait2 + 'a` - let lo = self.span; + let lo = self.token.span; let lifetime_defs = self.parse_late_bound_lifetime_defs()?; if self.token_is_bare_fn_keyword() { self.parse_ty_bare_fn(lifetime_defs)? @@ -1746,13 +1386,13 @@ impl<'a> Parser<'a> { let parse_plus = allow_plus && self.check_plus(); self.parse_remaining_bounds(lifetime_defs, path, lo, parse_plus)? } - } else if self.eat_keyword(keywords::Impl) { + } else if self.eat_keyword(kw::Impl) { // Always parse bounds greedily for better error recovery. let bounds = self.parse_generic_bounds(None)?; impl_dyn_multi = bounds.len() > 1 || self.prev_token_kind == PrevTokenKind::Plus; TyKind::ImplTrait(ast::DUMMY_NODE_ID, bounds) - } else if self.check_keyword(keywords::Dyn) && - (self.span.rust_2018() || + } else if self.check_keyword(kw::Dyn) && + (self.token.span.rust_2018() || self.look_ahead(1, |t| t.can_begin_bound() && !can_continue_type_after_non_fn_ident(t))) { self.bump(); // `dyn` @@ -1775,7 +1415,12 @@ impl<'a> Parser<'a> { if self.eat(&token::Not) { // Macro invocation in type position let (delim, tts) = self.expect_delimited_token_tree()?; - let node = Mac_ { path, tts, delim }; + let node = Mac_ { + path, + tts, + delim, + prior_type_ascription: self.last_type_ascription, + }; TyKind::Mac(respan(lo.to(self.prev_span), node)) } else { // Just a type path or bound list (trait object type) starting with a trait. @@ -1798,18 +1443,19 @@ impl<'a> Parser<'a> { } } else { let msg = format!("expected type, found {}", self.this_token_descr()); - return Err(self.fatal(&msg)); + let mut err = self.fatal(&msg); + err.span_label(self.token.span, "expected type"); + self.maybe_annotate_with_ascription(&mut err, true); + return Err(err); }; let span = lo.to(self.prev_span); - let ty = Ty { node, span, id: ast::DUMMY_NODE_ID }; + let ty = P(Ty { node, span, id: ast::DUMMY_NODE_ID }); // Try to recover from use of `+` with incorrect priority. self.maybe_report_ambiguous_plus(allow_plus, impl_dyn_multi, &ty); self.maybe_recover_from_bad_type_plus(allow_plus, &ty)?; - let ty = self.maybe_recover_from_bad_qpath(ty, allow_qpath_recovery)?; - - Ok(P(ty)) + self.maybe_recover_from_bad_qpath(ty, allow_qpath_recovery) } fn parse_remaining_bounds(&mut self, generic_params: Vec<GenericParam>, path: ast::Path, @@ -1823,106 +1469,17 @@ impl<'a> Parser<'a> { Ok(TyKind::TraitObject(bounds, TraitObjectSyntax::None)) } - fn maybe_report_ambiguous_plus(&mut self, allow_plus: bool, impl_dyn_multi: bool, ty: &Ty) { - if !allow_plus && impl_dyn_multi { - let sum_with_parens = format!("({})", pprust::ty_to_string(&ty)); - self.struct_span_err(ty.span, "ambiguous `+` in a type") - .span_suggestion( - ty.span, - "use parentheses to disambiguate", - sum_with_parens, - Applicability::MachineApplicable - ).emit(); - } - } - - fn maybe_recover_from_bad_type_plus(&mut self, allow_plus: bool, ty: &Ty) -> PResult<'a, ()> { - // Do not add `+` to expected tokens. - if !allow_plus || !self.token.is_like_plus() { - return Ok(()) - } - - self.bump(); // `+` - let bounds = self.parse_generic_bounds(None)?; - let sum_span = ty.span.to(self.prev_span); - - let mut err = struct_span_err!(self.sess.span_diagnostic, sum_span, E0178, - "expected a path on the left-hand side of `+`, not `{}`", pprust::ty_to_string(ty)); - - match ty.node { - TyKind::Rptr(ref lifetime, ref mut_ty) => { - let sum_with_parens = pprust::to_string(|s| { - use crate::print::pprust::PrintState; - - s.s.word("&")?; - s.print_opt_lifetime(lifetime)?; - s.print_mutability(mut_ty.mutbl)?; - s.popen()?; - s.print_type(&mut_ty.ty)?; - s.print_type_bounds(" +", &bounds)?; - s.pclose() - }); - err.span_suggestion( - sum_span, - "try adding parentheses", - sum_with_parens, - Applicability::MachineApplicable - ); - } - TyKind::Ptr(..) | TyKind::BareFn(..) => { - err.span_label(sum_span, "perhaps you forgot parentheses?"); - } - _ => { - err.span_label(sum_span, "expected a path"); - }, - } - err.emit(); - Ok(()) - } - - // Try to recover from associated item paths like `[T]::AssocItem`/`(T, U)::AssocItem`. - fn maybe_recover_from_bad_qpath<T: RecoverQPath>(&mut self, base: T, allow_recovery: bool) - -> PResult<'a, T> { - // Do not add `::` to expected tokens. - if !allow_recovery || self.token != token::ModSep { - return Ok(base); - } - let ty = match base.to_ty() { - Some(ty) => ty, - None => return Ok(base), - }; - - self.bump(); // `::` - let mut segments = Vec::new(); - self.parse_path_segments(&mut segments, T::PATH_STYLE, true)?; - - let span = ty.span.to(self.prev_span); - let path_span = span.to(span); // use an empty path since `position` == 0 - let recovered = base.to_recovered( - Some(QSelf { ty, path_span, position: 0 }), - ast::Path { segments, span }, - ); - - self.diagnostic() - .struct_span_err(span, "missing angle brackets in associated item path") - .span_suggestion( // this is a best-effort recovery - span, "try", recovered.to_string(), Applicability::MaybeIncorrect - ).emit(); - - Ok(recovered) - } - fn parse_borrowed_pointee(&mut self) -> PResult<'a, TyKind> { let opt_lifetime = if self.check_lifetime() { Some(self.expect_lifetime()) } else { None }; let mutbl = self.parse_mutability(); let ty = self.parse_ty_no_plus()?; - return Ok(TyKind::Rptr(opt_lifetime, MutTy { ty: ty, mutbl: mutbl })); + return Ok(TyKind::Rptr(opt_lifetime, MutTy { ty, mutbl })); } fn parse_ptr(&mut self) -> PResult<'a, MutTy> { - let mutbl = if self.eat_keyword(keywords::Mut) { + let mutbl = if self.eat_keyword(kw::Mut) { Mutability::Mutable - } else if self.eat_keyword(keywords::Const) { + } else if self.eat_keyword(kw::Const) { Mutability::Immutable } else { let span = self.prev_span; @@ -1934,17 +1491,17 @@ impl<'a> Parser<'a> { Mutability::Immutable }; let t = self.parse_ty_no_plus()?; - Ok(MutTy { ty: t, mutbl: mutbl }) + Ok(MutTy { ty: t, mutbl }) } - fn is_named_argument(&mut self) -> bool { - let offset = match self.token { + fn is_named_argument(&self) -> bool { + let offset = match self.token.kind { token::Interpolated(ref nt) => match **nt { token::NtPat(..) => return self.look_ahead(1, |t| t == &token::Colon), _ => 0, } token::BinOp(token::And) | token::AndAnd => 1, - _ if self.token.is_keyword(keywords::Mut) => 1, + _ if self.token.is_keyword(kw::Mut) => 1, _ => 0, }; @@ -1954,90 +1511,48 @@ impl<'a> Parser<'a> { /// Skips unexpected attributes and doc comments in this position and emits an appropriate /// error. - fn eat_incorrect_doc_comment(&mut self, applied_to: &str) { - if let token::DocComment(_) = self.token { - let mut err = self.diagnostic().struct_span_err( - self.span, - &format!("documentation comments cannot be applied to {}", applied_to), - ); - err.span_label(self.span, "doc comments are not allowed here"); - err.emit(); - self.bump(); - } else if self.token == token::Pound && self.look_ahead(1, |t| { - *t == token::OpenDelim(token::Bracket) - }) { - let lo = self.span; - // Skip every token until next possible arg. - while self.token != token::CloseDelim(token::Bracket) { - self.bump(); - } - let sp = lo.to(self.span); - self.bump(); - let mut err = self.diagnostic().struct_span_err( - sp, - &format!("attributes cannot be applied to {}", applied_to), - ); - err.span_label(sp, "attributes are not allowed here"); - err.emit(); - } - } - /// This version of parse arg doesn't necessarily require identifier names. - fn parse_arg_general(&mut self, require_name: bool, is_trait_item: bool, - allow_c_variadic: bool) -> PResult<'a, Arg> { - maybe_whole!(self, NtArg, |x| x); - - if let Ok(Some(_)) = self.parse_self_arg() { - let mut err = self.struct_span_err(self.prev_span, - "unexpected `self` argument in function"); - err.span_label(self.prev_span, - "`self` is only valid as the first argument of an associated function"); - return Err(err); + fn parse_arg_general<F>( + &mut self, + is_trait_item: bool, + allow_c_variadic: bool, + is_name_required: F, + ) -> PResult<'a, Arg> + where + F: Fn(&token::Token) -> bool + { + let lo = self.token.span; + let attrs = self.parse_arg_attributes()?; + if let Some(mut arg) = self.parse_self_arg()? { + arg.attrs = attrs.into(); + return self.recover_bad_self_arg(arg, is_trait_item); } - let (pat, ty) = if require_name || self.is_named_argument() { - debug!("parse_arg_general parse_pat (require_name:{})", - require_name); - self.eat_incorrect_doc_comment("method arguments"); - let pat = self.parse_pat(Some("argument name"))?; + let is_name_required = is_name_required(&self.token); + let (pat, ty) = if is_name_required || self.is_named_argument() { + debug!("parse_arg_general parse_pat (is_name_required:{})", is_name_required); + let pat = self.parse_pat(Some("argument name"))?; if let Err(mut err) = self.expect(&token::Colon) { - // If we find a pattern followed by an identifier, it could be an (incorrect) - // C-style parameter declaration. - if self.check_ident() && self.look_ahead(1, |t| { - *t == token::Comma || *t == token::CloseDelim(token::Paren) - }) { - let ident = self.parse_ident().unwrap(); - let span = pat.span.with_hi(ident.span.hi()); - - err.span_suggestion( - span, - "declare the type after the parameter binding", - String::from("<identifier>: <type>"), - Applicability::HasPlaceholders, - ); - } else if require_name && is_trait_item { - if let PatKind::Ident(_, ident, _) = pat.node { - err.span_suggestion( - pat.span, - "explicitly ignore parameter", - format!("_: {}", ident), - Applicability::MachineApplicable, - ); - } - - err.note("anonymous parameters are removed in the 2018 edition (see RFC 1685)"); + if let Some(ident) = self.argument_without_type( + &mut err, + pat, + is_name_required, + is_trait_item, + ) { + err.emit(); + return Ok(dummy_arg(ident)); + } else { + return Err(err); } - - return Err(err); } - self.eat_incorrect_doc_comment("a method argument's type"); + self.eat_incorrect_doc_comment_for_arg_type(); (pat, self.parse_ty_common(true, true, allow_c_variadic)?) } else { debug!("parse_arg_general ident_to_pat"); let parser_snapshot_before_ty = self.clone(); - self.eat_incorrect_doc_comment("a method argument's type"); + self.eat_incorrect_doc_comment_for_arg_type(); let mut ty = self.parse_ty_common(true, true, allow_c_variadic); if ty.is_ok() && self.token != token::Comma && self.token != token::CloseDelim(token::Paren) { @@ -2047,7 +1562,7 @@ impl<'a> Parser<'a> { } match ty { Ok(ty) => { - let ident = Ident::new(keywords::Invalid.name(), self.prev_span); + let ident = Ident::new(kw::Invalid, self.prev_span); let pat = P(Pat { id: ast::DUMMY_NODE_ID, node: PatKind::Ident( @@ -2065,44 +1580,20 @@ impl<'a> Parser<'a> { // Recover from attempting to parse the argument as a type without pattern. err.cancel(); mem::replace(self, parser_snapshot_before_ty); - let pat = self.parse_pat(Some("argument name"))?; - self.expect(&token::Colon)?; - let ty = self.parse_ty()?; - - let mut err = self.diagnostic().struct_span_err_with_code( - pat.span, - "patterns aren't allowed in methods without bodies", - DiagnosticId::Error("E0642".into()), - ); - err.span_suggestion_short( - pat.span, - "give this argument a name or use an underscore to ignore it", - "_".to_owned(), - Applicability::MachineApplicable, - ); - err.emit(); - - // Pretend the pattern is `_`, to avoid duplicate errors from AST validation. - let pat = P(Pat { - node: PatKind::Wild, - span: pat.span, - id: ast::DUMMY_NODE_ID - }); - (pat, ty) + self.recover_arg_parse()? } } }; - Ok(Arg { ty, pat, id: ast::DUMMY_NODE_ID }) - } + let span = lo.to(self.token.span); - /// Parses a single function argument. - crate fn parse_arg(&mut self) -> PResult<'a, Arg> { - self.parse_arg_general(true, false, false) + Ok(Arg { attrs: attrs.into(), id: ast::DUMMY_NODE_ID, pat, span, ty }) } /// Parses an argument in a lambda header (e.g., `|arg, arg|`). fn parse_fn_block_arg(&mut self) -> PResult<'a, Arg> { + let lo = self.token.span; + let attrs = self.parse_arg_attributes()?; let pat = self.parse_pat(Some("argument name"))?; let t = if self.eat(&token::Colon) { self.parse_ty()? @@ -2113,9 +1604,12 @@ impl<'a> Parser<'a> { span: self.prev_span, }) }; + let span = lo.to(self.token.span); Ok(Arg { + attrs: attrs.into(), ty: t, pat, + span, id: ast::DUMMY_NODE_ID }) } @@ -2128,93 +1622,13 @@ impl<'a> Parser<'a> { } } - /// Matches `token_lit = LIT_INTEGER | ...`. - fn parse_lit_token(&mut self) -> PResult<'a, LitKind> { - let out = match self.token { - token::Interpolated(ref nt) => match **nt { - token::NtExpr(ref v) | token::NtLiteral(ref v) => match v.node { - ExprKind::Lit(ref lit) => { lit.node.clone() } - _ => { return self.unexpected_last(&self.token); } - }, - _ => { return self.unexpected_last(&self.token); } - }, - token::Literal(lit, suf) => { - let diag = Some((self.span, &self.sess.span_diagnostic)); - let (suffix_illegal, result) = parse::lit_token(lit, suf, diag); - - if suffix_illegal { - let sp = self.span; - self.expect_no_suffix(sp, lit.literal_name(), suf) - } - - result.unwrap() - } - token::Dot if self.look_ahead(1, |t| match t { - token::Literal(parse::token::Lit::Integer(_) , _) => true, - _ => false, - }) => { // recover from `let x = .4;` - let lo = self.span; - self.bump(); - if let token::Literal( - parse::token::Lit::Integer(val), - suffix, - ) = self.token { - let suffix = suffix.and_then(|s| { - let s = s.as_str().get(); - if ["f32", "f64"].contains(&s) { - Some(s) - } else { - None - } - }).unwrap_or(""); - self.bump(); - let sp = lo.to(self.prev_span); - let mut err = self.diagnostic() - .struct_span_err(sp, "float literals must have an integer part"); - err.span_suggestion( - sp, - "must have an integer part", - format!("0.{}{}", val, suffix), - Applicability::MachineApplicable, - ); - err.emit(); - return Ok(match suffix { - "f32" => ast::LitKind::Float(val, ast::FloatTy::F32), - "f64" => ast::LitKind::Float(val, ast::FloatTy::F64), - _ => ast::LitKind::FloatUnsuffixed(val), - }); - } else { - unreachable!(); - }; - } - _ => { return self.unexpected_last(&self.token); } - }; - - self.bump(); - Ok(out) - } - - /// Matches `lit = true | false | token_lit`. - crate fn parse_lit(&mut self) -> PResult<'a, Lit> { - let lo = self.span; - let lit = if self.eat_keyword(keywords::True) { - LitKind::Bool(true) - } else if self.eat_keyword(keywords::False) { - LitKind::Bool(false) - } else { - let lit = self.parse_lit_token()?; - lit - }; - Ok(source_map::Spanned { node: lit, span: lo.to(self.prev_span) }) - } - /// Matches `'-' lit | lit` (cf. `ast_validation::AstValidator::check_expr_within_pat`). crate fn parse_literal_maybe_minus(&mut self) -> PResult<'a, P<Expr>> { maybe_whole_expr!(self); - let minus_lo = self.span; + let minus_lo = self.token.span; let minus_present = self.eat(&token::BinOp(token::Minus)); - let lo = self.span; + let lo = self.token.span; let literal = self.parse_lit()?; let hi = self.prev_span; let expr = self.mk_expr(lo.to(hi), ExprKind::Lit(literal), ThinVec::new()); @@ -2229,22 +1643,22 @@ impl<'a> Parser<'a> { } fn parse_path_segment_ident(&mut self) -> PResult<'a, ast::Ident> { - match self.token { - token::Ident(ident, _) if self.token.is_path_segment_keyword() => { - let span = self.span; + match self.token.kind { + token::Ident(name, _) if name.is_path_segment_keyword() => { + let span = self.token.span; self.bump(); - Ok(Ident::new(ident.name, span)) + Ok(Ident::new(name, span)) } _ => self.parse_ident(), } } fn parse_ident_or_underscore(&mut self) -> PResult<'a, ast::Ident> { - match self.token { - token::Ident(ident, false) if ident.name == keywords::Underscore.name() => { - let span = self.span; + match self.token.kind { + token::Ident(name, false) if name == kw::Underscore => { + let span = self.token.span; self.bump(); - Ok(Ident::new(ident.name, span)) + Ok(Ident::new(name, span)) } _ => self.parse_ident(), } @@ -2269,13 +1683,13 @@ impl<'a> Parser<'a> { // above). `path_span` has the span of that path, or an empty // span in the case of something like `<T>::Bar`. let (mut path, path_span); - if self.eat_keyword(keywords::As) { - let path_lo = self.span; + if self.eat_keyword(kw::As) { + let path_lo = self.token.span; path = self.parse_path(PathStyle::Type)?; path_span = path_lo.to(self.prev_span); } else { - path = ast::Path { segments: Vec::new(), span: syntax_pos::DUMMY_SP }; - path_span = self.span.to(self.span); + path_span = self.token.span.to(self.token.span); + path = ast::Path { segments: Vec::new(), span: path_span }; } // See doc comment for `unmatched_angle_bracket_count`. @@ -2288,7 +1702,7 @@ impl<'a> Parser<'a> { self.expect(&token::ModSep)?; let qself = QSelf { ty, path_span, position: path.segments.len() }; - self.parse_path_segments(&mut path.segments, style, true)?; + self.parse_path_segments(&mut path.segments, style)?; Ok((qself, ast::Path { segments: path.segments, span: lo.to(self.prev_span) })) } @@ -2304,11 +1718,6 @@ impl<'a> Parser<'a> { /// `Fn(Args)` (without disambiguator) /// `Fn::(Args)` (with disambiguator) pub fn parse_path(&mut self, style: PathStyle) -> PResult<'a, ast::Path> { - self.parse_path_common(style, true) - } - - crate fn parse_path_common(&mut self, style: PathStyle, enable_warning: bool) - -> PResult<'a, ast::Path> { maybe_whole!(self, NtPath, |path| { if style == PathStyle::Mod && path.segments.iter().any(|segment| segment.args.is_some()) { @@ -2317,13 +1726,13 @@ impl<'a> Parser<'a> { path }); - let lo = self.meta_var_span.unwrap_or(self.span); + let lo = self.meta_var_span.unwrap_or(self.token.span); let mut segments = Vec::new(); - let mod_sep_ctxt = self.span.ctxt(); + let mod_sep_ctxt = self.token.span.ctxt(); if self.eat(&token::ModSep) { segments.push(PathSegment::path_root(lo.shrink_to_lo().with_ctxt(mod_sep_ctxt))); } - self.parse_path_segments(&mut segments, style, enable_warning)?; + self.parse_path_segments(&mut segments, style)?; Ok(ast::Path { segments, span: lo.to(self.prev_span) }) } @@ -2332,10 +1741,10 @@ impl<'a> Parser<'a> { /// backwards-compatibility. This is used when parsing derive macro paths in `#[derive]` /// attributes. pub fn parse_path_allowing_meta(&mut self, style: PathStyle) -> PResult<'a, ast::Path> { - let meta_ident = match self.token { + let meta_ident = match self.token.kind { token::Interpolated(ref nt) => match **nt { token::NtMeta(ref meta) => match meta.node { - ast::MetaItemKind::Word => Some(meta.ident.clone()), + ast::MetaItemKind::Word => Some(meta.path.clone()), _ => None, }, _ => None, @@ -2349,13 +1758,12 @@ impl<'a> Parser<'a> { self.parse_path(style) } - fn parse_path_segments(&mut self, + crate fn parse_path_segments(&mut self, segments: &mut Vec<PathSegment>, - style: PathStyle, - enable_warning: bool) + style: PathStyle) -> PResult<'a, ()> { loop { - let segment = self.parse_path_segment(style, enable_warning)?; + let segment = self.parse_path_segment(style)?; if style == PathStyle::Expr { // In order to check for trailing angle brackets, we must have finished // recursing (`parse_path_segment` can indirectly call this function), @@ -2383,12 +1791,12 @@ impl<'a> Parser<'a> { } } - fn parse_path_segment(&mut self, style: PathStyle, enable_warning: bool) - -> PResult<'a, PathSegment> { + fn parse_path_segment(&mut self, style: PathStyle) -> PResult<'a, PathSegment> { let ident = self.parse_path_segment_ident()?; - let is_args_start = |token: &token::Token| match *token { - token::Lt | token::BinOp(token::Shl) | token::OpenDelim(token::Paren) => true, + let is_args_start = |token: &Token| match token.kind { + token::Lt | token::BinOp(token::Shl) | token::OpenDelim(token::Paren) + | token::LArrow => true, _ => false, }; let check_args_start = |this: &mut Self| { @@ -2401,13 +1809,6 @@ impl<'a> Parser<'a> { Ok(if style == PathStyle::Type && check_args_start(self) || style != PathStyle::Mod && self.check(&token::ModSep) && self.look_ahead(1, |t| is_args_start(t)) { - // Generic arguments are found - `<`, `(`, `::<` or `::(`. - if self.eat(&token::ModSep) && style == PathStyle::Type && enable_warning { - self.diagnostic().struct_span_warn(self.prev_span, "unnecessary path disambiguator") - .span_label(self.prev_span, "try removing `::`").emit(); - } - let lo = self.span; - // We use `style == PathStyle::Expr` to check if this is in a recursion or not. If // it isn't, then we reset the unmatched angle bracket count as we're about to start // parsing a new path. @@ -2416,24 +1817,19 @@ impl<'a> Parser<'a> { self.max_angle_bracket_count = 0; } + // Generic arguments are found - `<`, `(`, `::<` or `::(`. + self.eat(&token::ModSep); + let lo = self.token.span; let args = if self.eat_lt() { // `<'a, T, A = U>` - let (args, bindings) = + let (args, constraints) = self.parse_generic_args_with_leaning_angle_bracket_recovery(style, lo)?; self.expect_gt()?; let span = lo.to(self.prev_span); - AngleBracketedArgs { args, bindings, span }.into() + AngleBracketedArgs { args, constraints, span }.into() } else { // `(T, U) -> R` - self.bump(); // `(` - let (inputs, recovered) = self.parse_seq_to_before_tokens( - &[&token::CloseDelim(token::Paren)], - SeqSep::trailing_allowed(token::Comma), - TokenExpectType::Expect, - |p| p.parse_ty())?; - if !recovered { - self.bump(); // `)` - } + let (inputs, _) = self.parse_paren_comma_seq(|p| p.parse_ty())?; let span = lo.to(self.prev_span); let output = if self.eat(&token::RArrow) { Some(self.parse_ty_common(false, false, false)?) @@ -2458,17 +1854,17 @@ impl<'a> Parser<'a> { /// Parses a single lifetime `'a` or panics. crate fn expect_lifetime(&mut self) -> Lifetime { if let Some(ident) = self.token.lifetime() { - let span = self.span; + let span = self.token.span; self.bump(); Lifetime { ident: Ident::new(ident.name, span), id: ast::DUMMY_NODE_ID } } else { - self.span_bug(self.span, "not a lifetime") + self.span_bug(self.token.span, "not a lifetime") } } fn eat_label(&mut self) -> Option<Label> { if let Some(ident) = self.token.lifetime() { - let span = self.span; + let span = self.token.span; self.bump(); Some(Label { ident: Ident::new(ident.name, span) }) } else { @@ -2478,7 +1874,7 @@ impl<'a> Parser<'a> { /// Parses mutability (`mut` or nothing). fn parse_mutability(&mut self) -> Mutability { - if self.eat_keyword(keywords::Mut) { + if self.eat_keyword(kw::Mut) { Mutability::Mutable } else { Mutability::Immutable @@ -2486,9 +1882,11 @@ impl<'a> Parser<'a> { } fn parse_field_name(&mut self) -> PResult<'a, Ident> { - if let token::Literal(token::Integer(name), None) = self.token { + if let token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) = + self.token.kind { + self.expect_no_suffix(self.token.span, "a tuple index", suffix); self.bump(); - Ok(Ident::new(name, self.prev_span)) + Ok(Ident::new(symbol, self.prev_span)) } else { self.parse_ident_common(false) } @@ -2497,7 +1895,7 @@ impl<'a> Parser<'a> { /// Parse ident (COLON expr)? fn parse_field(&mut self) -> PResult<'a, Field> { let attrs = self.parse_outer_attributes()?; - let lo = self.span; + let lo = self.token.span; // Check if a colon exists one ahead. This means we're parsing a fieldname. let (fieldname, expr, is_shorthand) = if self.look_ahead(1, |t| { @@ -2509,9 +1907,9 @@ impl<'a> Parser<'a> { // initialize a field with an eq rather than a colon. if self.token == token::Eq { self.diagnostic() - .struct_span_err(self.span, "expected `:`, found `=`") + .struct_span_err(self.token.span, "expected `:`, found `=`") .span_suggestion( - fieldname.span.shrink_to_hi().to(self.span), + fieldname.span.shrink_to_hi().to(self.token.span), "replace equals symbol with a colon", ":".to_string(), Applicability::MachineApplicable, @@ -2537,59 +1935,50 @@ impl<'a> Parser<'a> { }) } - fn mk_expr(&mut self, span: Span, node: ExprKind, attrs: ThinVec<Attribute>) -> P<Expr> { + crate fn mk_expr(&self, span: Span, node: ExprKind, attrs: ThinVec<Attribute>) -> P<Expr> { P(Expr { node, span, attrs, id: ast::DUMMY_NODE_ID }) } - fn mk_unary(&mut self, unop: ast::UnOp, expr: P<Expr>) -> ast::ExprKind { + fn mk_unary(&self, unop: ast::UnOp, expr: P<Expr>) -> ast::ExprKind { ExprKind::Unary(unop, expr) } - fn mk_binary(&mut self, binop: ast::BinOp, lhs: P<Expr>, rhs: P<Expr>) -> ast::ExprKind { + fn mk_binary(&self, binop: ast::BinOp, lhs: P<Expr>, rhs: P<Expr>) -> ast::ExprKind { ExprKind::Binary(binop, lhs, rhs) } - fn mk_call(&mut self, f: P<Expr>, args: Vec<P<Expr>>) -> ast::ExprKind { + fn mk_call(&self, f: P<Expr>, args: Vec<P<Expr>>) -> ast::ExprKind { ExprKind::Call(f, args) } - fn mk_index(&mut self, expr: P<Expr>, idx: P<Expr>) -> ast::ExprKind { + fn mk_index(&self, expr: P<Expr>, idx: P<Expr>) -> ast::ExprKind { ExprKind::Index(expr, idx) } - fn mk_range(&mut self, + fn mk_range(&self, start: Option<P<Expr>>, end: Option<P<Expr>>, limits: RangeLimits) -> PResult<'a, ast::ExprKind> { if end.is_none() && limits == RangeLimits::Closed { - Err(self.span_fatal_err(self.span, Error::InclusiveRangeWithNoEnd)) + Err(self.span_fatal_err(self.token.span, Error::InclusiveRangeWithNoEnd)) } else { Ok(ExprKind::Range(start, end, limits)) } } - fn mk_assign_op(&mut self, binop: ast::BinOp, + fn mk_assign_op(&self, binop: ast::BinOp, lhs: P<Expr>, rhs: P<Expr>) -> ast::ExprKind { ExprKind::AssignOp(binop, lhs, rhs) } - pub fn mk_mac_expr(&mut self, span: Span, m: Mac_, attrs: ThinVec<Attribute>) -> P<Expr> { - P(Expr { - id: ast::DUMMY_NODE_ID, - node: ExprKind::Mac(source_map::Spanned {node: m, span: span}), - span, - attrs, - }) - } - fn expect_delimited_token_tree(&mut self) -> PResult<'a, (MacDelimiter, TokenStream)> { - let delim = match self.token { + let delim = match self.token.kind { token::OpenDelim(delim) => delim, _ => { let msg = "expected open delimiter"; let mut err = self.fatal(msg); - err.span_label(self.span, msg); + err.span_label(self.token.span, msg); return Err(err) } }; @@ -2612,6 +2001,7 @@ impl<'a> Parser<'a> { /// N.B., this does not parse outer attributes, and is private because it only works /// correctly if called from `parse_dot_or_call_expr()`. fn parse_bottom_expr(&mut self) -> PResult<'a, P<Expr>> { + maybe_recover_from_interpolated_ty_qpath!(self, true); maybe_whole_expr!(self); // Outer attributes are already parsed and will be @@ -2621,13 +2011,34 @@ impl<'a> Parser<'a> { // attributes by giving them a empty "already parsed" list. let mut attrs = ThinVec::new(); - let lo = self.span; - let mut hi = self.span; + let lo = self.token.span; + let mut hi = self.token.span; let ex: ExprKind; - // Note: when adding new syntax here, don't forget to adjust Token::can_begin_expr(). - match self.token { + macro_rules! parse_lit { + () => { + match self.parse_lit() { + Ok(literal) => { + hi = self.prev_span; + ex = ExprKind::Lit(literal); + } + Err(mut err) => { + self.cancel(&mut err); + return Err(self.expected_expression_found()); + } + } + } + } + + // Note: when adding new syntax here, don't forget to adjust TokenKind::can_begin_expr(). + match self.token.kind { + // This match arm is a special-case of the `_` match arm below and + // could be removed without changing functionality, but it's faster + // to have it here, especially for programs with large constants. + token::Literal(_) => { + parse_lit!() + } token::OpenDelim(token::Paren) => { self.bump(); @@ -2639,7 +2050,13 @@ impl<'a> Parser<'a> { let mut trailing_comma = false; let mut recovered = false; while self.token != token::CloseDelim(token::Paren) { - es.push(self.parse_expr()?); + es.push(match self.parse_expr() { + Ok(es) => es, + Err(err) => { + // recover from parse error in tuple list + return Ok(self.recover_seq_parse_error(token::Paren, lo, Err(err))); + } + }); recovered = self.expect_one_of( &[], &[token::Comma, token::CloseDelim(token::Paren)], @@ -2711,38 +2128,30 @@ impl<'a> Parser<'a> { hi = path.span; return Ok(self.mk_expr(lo.to(hi), ExprKind::Path(Some(qself), path), attrs)); } - if self.span.rust_2018() && self.check_keyword(keywords::Async) - { - if self.is_async_block() { // check for `async {` and `async move {` - return self.parse_async_block(attrs); - } else { - return self.parse_lambda_expr(attrs); - } - } - if self.check_keyword(keywords::Move) || self.check_keyword(keywords::Static) { + if self.check_keyword(kw::Move) || self.check_keyword(kw::Static) { return self.parse_lambda_expr(attrs); } - if self.eat_keyword(keywords::If) { + if self.eat_keyword(kw::If) { return self.parse_if_expr(attrs); } - if self.eat_keyword(keywords::For) { + if self.eat_keyword(kw::For) { let lo = self.prev_span; return self.parse_for_expr(None, lo, attrs); } - if self.eat_keyword(keywords::While) { + if self.eat_keyword(kw::While) { let lo = self.prev_span; return self.parse_while_expr(None, lo, attrs); } if let Some(label) = self.eat_label() { let lo = label.ident.span; self.expect(&token::Colon)?; - if self.eat_keyword(keywords::While) { + if self.eat_keyword(kw::While) { return self.parse_while_expr(Some(label), lo, attrs) } - if self.eat_keyword(keywords::For) { + if self.eat_keyword(kw::For) { return self.parse_for_expr(Some(label), lo, attrs) } - if self.eat_keyword(keywords::Loop) { + if self.eat_keyword(kw::Loop) { return self.parse_loop_expr(Some(label), lo, attrs) } if self.token == token::OpenDelim(token::Brace) { @@ -2753,27 +2162,27 @@ impl<'a> Parser<'a> { } let msg = "expected `while`, `for`, `loop` or `{` after a label"; let mut err = self.fatal(msg); - err.span_label(self.span, msg); + err.span_label(self.token.span, msg); return Err(err); } - if self.eat_keyword(keywords::Loop) { + if self.eat_keyword(kw::Loop) { let lo = self.prev_span; return self.parse_loop_expr(None, lo, attrs); } - if self.eat_keyword(keywords::Continue) { + if self.eat_keyword(kw::Continue) { let label = self.eat_label(); let ex = ExprKind::Continue(label); let hi = self.prev_span; return Ok(self.mk_expr(lo.to(hi), ex, attrs)); } - if self.eat_keyword(keywords::Match) { + if self.eat_keyword(kw::Match) { let match_sp = self.prev_span; return self.parse_match_expr(attrs).map_err(|mut err| { err.span_label(match_sp, "while parsing this match expression"); err }); } - if self.eat_keyword(keywords::Unsafe) { + if self.eat_keyword(kw::Unsafe) { return self.parse_block_expr( None, lo, @@ -2786,11 +2195,21 @@ impl<'a> Parser<'a> { return Err(db); } if self.is_try_block() { - let lo = self.span; - assert!(self.eat_keyword(keywords::Try)); + let lo = self.token.span; + assert!(self.eat_keyword(kw::Try)); return self.parse_try_block(lo, attrs); } - if self.eat_keyword(keywords::Return) { + + // Span::rust_2018() is somewhat expensive; don't get it repeatedly. + let is_span_rust_2018 = self.token.span.rust_2018(); + if is_span_rust_2018 && self.check_keyword(kw::Async) { + return if self.is_async_block() { // check for `async {` and `async move {` + self.parse_async_block(attrs) + } else { + self.parse_lambda_expr(attrs) + }; + } + if self.eat_keyword(kw::Return) { if self.token.can_begin_expr() { let e = self.parse_expr()?; hi = e.span; @@ -2798,7 +2217,7 @@ impl<'a> Parser<'a> { } else { ex = ExprKind::Ret(None); } - } else if self.eat_keyword(keywords::Break) { + } else if self.eat_keyword(kw::Break) { let label = self.eat_label(); let e = if self.token.can_begin_expr() && !(self.token == token::OpenDelim(token::Brace) @@ -2810,7 +2229,7 @@ impl<'a> Parser<'a> { }; ex = ExprKind::Break(label, e); hi = self.prev_span; - } else if self.eat_keyword(keywords::Yield) { + } else if self.eat_keyword(kw::Yield) { if self.token.can_begin_expr() { let e = self.parse_expr()?; hi = e.span; @@ -2818,37 +2237,37 @@ impl<'a> Parser<'a> { } else { ex = ExprKind::Yield(None); } - } else if self.token.is_keyword(keywords::Let) { - // Catch this syntax error here, instead of in `parse_ident`, so - // that we can explicitly mention that let is not to be used as an expression - let mut db = self.fatal("expected expression, found statement (`let`)"); - db.span_label(self.span, "expected expression"); - db.note("variable declaration using `let` is a statement"); - return Err(db); + } else if self.eat_keyword(kw::Let) { + return self.parse_let_expr(attrs); + } else if is_span_rust_2018 && self.eat_keyword(kw::Await) { + let (await_hi, e_kind) = self.parse_incorrect_await_syntax(lo, self.prev_span)?; + hi = await_hi; + ex = e_kind; } else if self.token.is_path_start() { - let pth = self.parse_path(PathStyle::Expr)?; + let path = self.parse_path(PathStyle::Expr)?; // `!`, as an operator, is prefix, so we know this isn't that if self.eat(&token::Not) { // MACRO INVOCATION expression let (delim, tts) = self.expect_delimited_token_tree()?; - let hi = self.prev_span; - let node = Mac_ { path: pth, tts, delim }; - return Ok(self.mk_mac_expr(lo.to(hi), node, attrs)) - } - if self.check(&token::OpenDelim(token::Brace)) { - // This is a struct literal, unless we're prohibited - // from parsing struct literals here. - let prohibited = self.restrictions.contains( - Restrictions::NO_STRUCT_LITERAL - ); - if !prohibited { - return self.parse_struct_expr(lo, pth, attrs); + hi = self.prev_span; + ex = ExprKind::Mac(respan(lo.to(hi), Mac_ { + path, + tts, + delim, + prior_type_ascription: self.last_type_ascription, + })); + } else if self.check(&token::OpenDelim(token::Brace)) { + if let Some(expr) = self.maybe_parse_struct_expr(lo, &path, &attrs) { + return expr; + } else { + hi = path.span; + ex = ExprKind::Path(None, path); } + } else { + hi = path.span; + ex = ExprKind::Path(None, path); } - - hi = pth.span; - ex = ExprKind::Path(None, pth); } else { if !self.unclosed_delims.is_empty() && self.check(&token::Semi) { // Don't complain about bare semicolons after unclosed braces @@ -2863,30 +2282,56 @@ impl<'a> Parser<'a> { // | ^ expected expression // ``` self.bump(); - return Ok(self.mk_expr(self.span, ExprKind::Err, ThinVec::new())); - } - match self.parse_literal_maybe_minus() { - Ok(expr) => { - hi = expr.span; - ex = expr.node.clone(); - } - Err(mut err) => { - self.cancel(&mut err); - let msg = format!("expected expression, found {}", - self.this_token_descr()); - let mut err = self.fatal(&msg); - err.span_label(self.span, "expected expression"); - return Err(err); - } + return Ok(self.mk_expr(self.token.span, ExprKind::Err, ThinVec::new())); } + parse_lit!() } } } - let expr = Expr { node: ex, span: lo.to(hi), id: ast::DUMMY_NODE_ID, attrs }; - let expr = self.maybe_recover_from_bad_qpath(expr, true)?; + let expr = self.mk_expr(lo.to(hi), ex, attrs); + self.maybe_recover_from_bad_qpath(expr, true) + } - return Ok(P(expr)); + fn maybe_parse_struct_expr( + &mut self, + lo: Span, + path: &ast::Path, + attrs: &ThinVec<Attribute>, + ) -> Option<PResult<'a, P<Expr>>> { + let struct_allowed = !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL); + let certainly_not_a_block = || self.look_ahead(1, |t| t.is_ident()) && ( + // `{ ident, ` cannot start a block + self.look_ahead(2, |t| t == &token::Comma) || + self.look_ahead(2, |t| t == &token::Colon) && ( + // `{ ident: token, ` cannot start a block + self.look_ahead(4, |t| t == &token::Comma) || + // `{ ident: ` cannot start a block unless it's a type ascription `ident: Type` + self.look_ahead(3, |t| !t.can_begin_type()) + ) + ); + + if struct_allowed || certainly_not_a_block() { + // This is a struct literal, but we don't can't accept them here + let expr = self.parse_struct_expr(lo, path.clone(), attrs.clone()); + if let (Ok(expr), false) = (&expr, struct_allowed) { + self.struct_span_err( + expr.span, + "struct literals are not allowed here", + ) + .multipart_suggestion( + "surround the struct literal with parentheses", + vec![ + (lo.shrink_to_lo(), "(".to_string()), + (expr.span.shrink_to_hi(), ")".to_string()), + ], + Applicability::MachineApplicable, + ) + .emit(); + } + return Some(expr); + } + None } fn parse_struct_expr(&mut self, lo: Span, pth: ast::Path, mut attrs: ThinVec<Attribute>) @@ -2911,33 +2356,31 @@ impl<'a> Parser<'a> { } } if self.token == token::Comma { - let mut err = self.sess.span_diagnostic.mut_span_err( + self.struct_span_err( exp_span.to(self.prev_span), "cannot use a comma after the base struct", - ); - err.span_suggestion_short( - self.span, + ) + .span_suggestion_short( + self.token.span, "remove this comma", String::new(), Applicability::MachineApplicable - ); - err.note("the base struct must always be the last field"); - err.emit(); + ) + .note("the base struct must always be the last field") + .emit(); self.recover_stmt(); } break; } let mut recovery_field = None; - if let token::Ident(ident, _) = self.token { + if let token::Ident(name, _) = self.token.kind { if !self.token.is_reserved_ident() && self.look_ahead(1, |t| *t == token::Colon) { // Use in case of error after field-looking code: `S { foo: () with a }` - let mut ident = ident.clone(); - ident.span = self.span; recovery_field = Some(ast::Field { - ident, - span: self.span, - expr: self.mk_expr(self.span, ExprKind::Err, ThinVec::new()), + ident: Ident::new(name, self.token.span), + span: self.token.span, + expr: self.mk_expr(self.token.span, ExprKind::Err, ThinVec::new()), is_shorthand: false, attrs: ThinVec::new(), }); @@ -2980,7 +2423,7 @@ impl<'a> Parser<'a> { } } - let span = lo.to(self.span); + let span = lo.to(self.token.span); self.expect(&token::CloseDelim(token::Brace))?; return Ok(self.mk_expr(span, ExprKind::Struct(pth, fields, base), attrs)); } @@ -2996,10 +2439,13 @@ impl<'a> Parser<'a> { } /// Parses a block or unsafe block. - fn parse_block_expr(&mut self, opt_label: Option<Label>, - lo: Span, blk_mode: BlockCheckMode, - outer_attrs: ThinVec<Attribute>) - -> PResult<'a, P<Expr>> { + crate fn parse_block_expr( + &mut self, + opt_label: Option<Label>, + lo: Span, + blk_mode: BlockCheckMode, + outer_attrs: ThinVec<Attribute>, + ) -> PResult<'a, P<Expr>> { self.expect(&token::OpenDelim(token::Brace))?; let mut attrs = outer_attrs; @@ -3034,15 +2480,13 @@ impl<'a> Parser<'a> { attrs.extend::<Vec<_>>(expr.attrs.into()); expr.attrs = attrs; match expr.node { - ExprKind::If(..) | ExprKind::IfLet(..) => { - if !expr.attrs.is_empty() { - // Just point to the first attribute in there... - let span = expr.attrs[0].span; - - self.span_err(span, - "attributes are not yet allowed on `if` \ - expressions"); - } + ExprKind::If(..) if !expr.attrs.is_empty() => { + // Just point to the first attribute in there... + let span = expr.attrs[0].span; + + self.span_err(span, + "attributes are not yet allowed on `if` \ + expressions"); } _ => {} } @@ -3051,20 +2495,26 @@ impl<'a> Parser<'a> { ) } - // Assuming we have just parsed `.`, continue parsing into an expression. + fn mk_await_expr(&mut self, self_arg: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> { + let span = lo.to(self.prev_span); + let await_expr = self.mk_expr(span, ExprKind::Await(self_arg), ThinVec::new()); + self.recover_from_await_method_call(); + Ok(await_expr) + } + + /// Assuming we have just parsed `.`, continue parsing into an expression. fn parse_dot_suffix(&mut self, self_arg: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> { - let segment = self.parse_path_segment(PathStyle::Expr, true)?; + if self.token.span.rust_2018() && self.eat_keyword(kw::Await) { + return self.mk_await_expr(self_arg, lo); + } + + let segment = self.parse_path_segment(PathStyle::Expr)?; self.check_trailing_angle_brackets(&segment, token::OpenDelim(token::Paren)); - Ok(match self.token { + Ok(match self.token.kind { token::OpenDelim(token::Paren) => { // Method call `expr.f()` - let mut args = self.parse_unspanned_seq( - &token::OpenDelim(token::Paren), - &token::CloseDelim(token::Paren), - SeqSep::trailing_allowed(token::Comma), - |p| Ok(p.parse_expr()?) - )?; + let mut args = self.parse_paren_expr_seq()?; args.insert(0, self_arg); let span = lo.to(self.prev_span); @@ -3083,116 +2533,6 @@ impl<'a> Parser<'a> { }) } - /// This function checks if there are trailing angle brackets and produces - /// a diagnostic to suggest removing them. - /// - /// ```ignore (diagnostic) - /// let _ = vec![1, 2, 3].into_iter().collect::<Vec<usize>>>>(); - /// ^^ help: remove extra angle brackets - /// ``` - fn check_trailing_angle_brackets(&mut self, segment: &PathSegment, end: token::Token) { - // This function is intended to be invoked after parsing a path segment where there are two - // cases: - // - // 1. A specific token is expected after the path segment. - // eg. `x.foo(`, `x.foo::<u32>(` (parenthesis - method call), - // `Foo::`, or `Foo::<Bar>::` (mod sep - continued path). - // 2. No specific token is expected after the path segment. - // eg. `x.foo` (field access) - // - // This function is called after parsing `.foo` and before parsing the token `end` (if - // present). This includes any angle bracket arguments, such as `.foo::<u32>` or - // `Foo::<Bar>`. - - // We only care about trailing angle brackets if we previously parsed angle bracket - // arguments. This helps stop us incorrectly suggesting that extra angle brackets be - // removed in this case: - // - // `x.foo >> (3)` (where `x.foo` is a `u32` for example) - // - // This case is particularly tricky as we won't notice it just looking at the tokens - - // it will appear the same (in terms of upcoming tokens) as below (since the `::<u32>` will - // have already been parsed): - // - // `x.foo::<u32>>>(3)` - let parsed_angle_bracket_args = segment.args - .as_ref() - .map(|args| args.is_angle_bracketed()) - .unwrap_or(false); - - debug!( - "check_trailing_angle_brackets: parsed_angle_bracket_args={:?}", - parsed_angle_bracket_args, - ); - if !parsed_angle_bracket_args { - return; - } - - // Keep the span at the start so we can highlight the sequence of `>` characters to be - // removed. - let lo = self.span; - - // We need to look-ahead to see if we have `>` characters without moving the cursor forward - // (since we might have the field access case and the characters we're eating are - // actual operators and not trailing characters - ie `x.foo >> 3`). - let mut position = 0; - - // We can encounter `>` or `>>` tokens in any order, so we need to keep track of how - // many of each (so we can correctly pluralize our error messages) and continue to - // advance. - let mut number_of_shr = 0; - let mut number_of_gt = 0; - while self.look_ahead(position, |t| { - trace!("check_trailing_angle_brackets: t={:?}", t); - if *t == token::BinOp(token::BinOpToken::Shr) { - number_of_shr += 1; - true - } else if *t == token::Gt { - number_of_gt += 1; - true - } else { - false - } - }) { - position += 1; - } - - // If we didn't find any trailing `>` characters, then we have nothing to error about. - debug!( - "check_trailing_angle_brackets: number_of_gt={:?} number_of_shr={:?}", - number_of_gt, number_of_shr, - ); - if number_of_gt < 1 && number_of_shr < 1 { - return; - } - - // Finally, double check that we have our end token as otherwise this is the - // second case. - if self.look_ahead(position, |t| { - trace!("check_trailing_angle_brackets: t={:?}", t); - *t == end - }) { - // Eat from where we started until the end token so that parsing can continue - // as if we didn't have those extra angle brackets. - self.eat_to_tokens(&[&end]); - let span = lo.until(self.span); - - let plural = number_of_gt > 1 || number_of_shr >= 1; - self.diagnostic() - .struct_span_err( - span, - &format!("unmatched angle bracket{}", if plural { "s" } else { "" }), - ) - .span_suggestion( - span, - &format!("remove extra angle bracket{}", if plural { "s" } else { "" }), - String::new(), - Applicability::MachineApplicable, - ) - .emit(); - } - } - fn parse_dot_or_call_expr_with_(&mut self, e0: P<Expr>, lo: Span) -> PResult<'a, P<Expr>> { let mut e = e0; let mut hi; @@ -3205,125 +2545,128 @@ impl<'a> Parser<'a> { // expr.f if self.eat(&token::Dot) { - match self.token { - token::Ident(..) => { - e = self.parse_dot_suffix(e, lo)?; - } - token::Literal(token::Integer(name), _) => { - let span = self.span; - self.bump(); - let field = ExprKind::Field(e, Ident::new(name, span)); - e = self.mk_expr(lo.to(span), field, ThinVec::new()); - } - token::Literal(token::Float(n), _suf) => { - self.bump(); - let fstr = n.as_str(); - let mut err = self.diagnostic() - .struct_span_err(self.prev_span, &format!("unexpected token: `{}`", n)); - err.span_label(self.prev_span, "unexpected token"); - if fstr.chars().all(|x| "0123456789.".contains(x)) { - let float = match fstr.parse::<f64>().ok() { - Some(f) => f, - None => continue, - }; - let sugg = pprust::to_string(|s| { - use crate::print::pprust::PrintState; - s.popen()?; - s.print_expr(&e)?; - s.s.word( ".")?; - s.print_usize(float.trunc() as usize)?; - s.pclose()?; - s.s.word(".")?; - s.s.word(fstr.splitn(2, ".").last().unwrap().to_string()) - }); - err.span_suggestion( - lo.to(self.prev_span), - "try parenthesizing the first index", - sugg, - Applicability::MachineApplicable - ); + match self.token.kind { + token::Ident(..) => { + e = self.parse_dot_suffix(e, lo)?; } - return Err(err); + token::Literal(token::Lit { kind: token::Integer, symbol, suffix }) => { + let span = self.token.span; + self.bump(); + let field = ExprKind::Field(e, Ident::new(symbol, span)); + e = self.mk_expr(lo.to(span), field, ThinVec::new()); + + self.expect_no_suffix(span, "a tuple index", suffix); + } + token::Literal(token::Lit { kind: token::Float, symbol, .. }) => { + self.bump(); + let fstr = symbol.as_str(); + let msg = format!("unexpected token: `{}`", symbol); + let mut err = self.diagnostic().struct_span_err(self.prev_span, &msg); + err.span_label(self.prev_span, "unexpected token"); + if fstr.chars().all(|x| "0123456789.".contains(x)) { + let float = match fstr.parse::<f64>().ok() { + Some(f) => f, + None => continue, + }; + let sugg = pprust::to_string(|s| { + s.popen(); + s.print_expr(&e); + s.s.word( "."); + s.print_usize(float.trunc() as usize); + s.pclose(); + s.s.word("."); + s.s.word(fstr.splitn(2, ".").last().unwrap().to_string()) + }); + err.span_suggestion( + lo.to(self.prev_span), + "try parenthesizing the first index", + sugg, + Applicability::MachineApplicable + ); + } + return Err(err); - } - _ => { - // FIXME Could factor this out into non_fatal_unexpected or something. - let actual = self.this_token_to_string(); - self.span_err(self.span, &format!("unexpected token: `{}`", actual)); - } + } + _ => { + // FIXME Could factor this out into non_fatal_unexpected or something. + let actual = self.this_token_to_string(); + self.span_err(self.token.span, &format!("unexpected token: `{}`", actual)); + } } continue; } if self.expr_is_complete(&e) { break; } - match self.token { - // expr(...) - token::OpenDelim(token::Paren) => { - let es = self.parse_unspanned_seq( - &token::OpenDelim(token::Paren), - &token::CloseDelim(token::Paren), - SeqSep::trailing_allowed(token::Comma), - |p| Ok(p.parse_expr()?) - )?; - hi = self.prev_span; - - let nd = self.mk_call(e, es); - e = self.mk_expr(lo.to(hi), nd, ThinVec::new()); - } + match self.token.kind { + // expr(...) + token::OpenDelim(token::Paren) => { + let seq = self.parse_paren_expr_seq().map(|es| { + let nd = self.mk_call(e, es); + let hi = self.prev_span; + self.mk_expr(lo.to(hi), nd, ThinVec::new()) + }); + e = self.recover_seq_parse_error(token::Paren, lo, seq); + } - // expr[...] - // Could be either an index expression or a slicing expression. - token::OpenDelim(token::Bracket) => { - self.bump(); - let ix = self.parse_expr()?; - hi = self.span; - self.expect(&token::CloseDelim(token::Bracket))?; - let index = self.mk_index(e, ix); - e = self.mk_expr(lo.to(hi), index, ThinVec::new()) - } - _ => return Ok(e) + // expr[...] + // Could be either an index expression or a slicing expression. + token::OpenDelim(token::Bracket) => { + self.bump(); + let ix = self.parse_expr()?; + hi = self.token.span; + self.expect(&token::CloseDelim(token::Bracket))?; + let index = self.mk_index(e, ix); + e = self.mk_expr(lo.to(hi), index, ThinVec::new()) + } + _ => return Ok(e) } } return Ok(e); } + fn parse_paren_expr_seq(&mut self) -> PResult<'a, Vec<P<Expr>>> { + self.parse_paren_comma_seq(|p| p.parse_expr()).map(|(r, _)| r) + } + crate fn process_potential_macro_variable(&mut self) { - let (token, span) = match self.token { - token::Dollar if self.span.ctxt() != syntax_pos::hygiene::SyntaxContext::empty() && + self.token = match self.token.kind { + token::Dollar if self.token.span.ctxt() != SyntaxContext::empty() && self.look_ahead(1, |t| t.is_ident()) => { self.bump(); - let name = match self.token { - token::Ident(ident, _) => ident, + let name = match self.token.kind { + token::Ident(name, _) => name, _ => unreachable!() }; - let mut err = self.fatal(&format!("unknown macro variable `{}`", name)); - err.span_label(self.span, "unknown macro variable"); - err.emit(); + let span = self.prev_span.to(self.token.span); + self.diagnostic() + .struct_span_fatal(span, &format!("unknown macro variable `{}`", name)) + .span_label(span, "unknown macro variable") + .emit(); self.bump(); return } token::Interpolated(ref nt) => { - self.meta_var_span = Some(self.span); + self.meta_var_span = Some(self.token.span); // Interpolated identifier and lifetime tokens are replaced with usual identifier // and lifetime tokens, so the former are never encountered during normal parsing. match **nt { - token::NtIdent(ident, is_raw) => (token::Ident(ident, is_raw), ident.span), - token::NtLifetime(ident) => (token::Lifetime(ident), ident.span), + token::NtIdent(ident, is_raw) => + Token::new(token::Ident(ident.name, is_raw), ident.span), + token::NtLifetime(ident) => + Token::new(token::Lifetime(ident.name), ident.span), _ => return, } } _ => return, }; - self.token = token; - self.span = span; } /// Parses a single token tree from the input. crate fn parse_token_tree(&mut self) -> TokenTree { - match self.token { + match self.token.kind { token::OpenDelim(..) => { let frame = mem::replace(&mut self.token_cursor.frame, self.token_cursor.stack.pop().unwrap()); - self.span = frame.span.entire(); + self.token.span = frame.span.entire(); self.bump(); TokenTree::Delimited( frame.span, @@ -3333,15 +2676,14 @@ impl<'a> Parser<'a> { }, token::CloseDelim(_) | token::Eof => unreachable!(), _ => { - let (token, span) = (mem::replace(&mut self.token, token::Whitespace), self.span); + let token = self.token.take(); self.bump(); - TokenTree::Token(span, token) + TokenTree::Token(token) } } } - // parse a stream of tokens into a list of TokenTree's, - // up to EOF. + /// Parses a stream of tokens into a list of `TokenTree`s, up to EOF. pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> { let mut tts = Vec::new(); while self.token != token::Eof { @@ -3353,7 +2695,7 @@ impl<'a> Parser<'a> { pub fn parse_tokens(&mut self) -> TokenStream { let mut result = Vec::new(); loop { - match self.token { + match self.token.kind { token::Eof | token::CloseDelim(..) => break, _ => result.push(self.parse_token_tree().into()), } @@ -3366,9 +2708,9 @@ impl<'a> Parser<'a> { already_parsed_attrs: Option<ThinVec<Attribute>>) -> PResult<'a, P<Expr>> { let attrs = self.parse_or_use_outer_attributes(already_parsed_attrs)?; - let lo = self.span; - // Note: when adding new unary operators, don't forget to adjust Token::can_begin_expr() - let (hi, ex) = match self.token { + let lo = self.token.span; + // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() + let (hi, ex) = match self.token.kind { token::Not => { self.bump(); let e = self.parse_prefix_expr(None); @@ -3381,15 +2723,14 @@ impl<'a> Parser<'a> { let e = self.parse_prefix_expr(None); let (span, e) = self.interpolated_or_expr_span(e)?; let span_of_tilde = lo; - let mut err = self.diagnostic() - .struct_span_err(span_of_tilde, "`~` cannot be used as a unary operator"); - err.span_suggestion_short( - span_of_tilde, - "use `!` to perform bitwise negation", - "!".to_owned(), - Applicability::MachineApplicable - ); - err.emit(); + self.struct_span_err(span_of_tilde, "`~` cannot be used as a unary operator") + .span_suggestion_short( + span_of_tilde, + "use `!` to perform bitwise negation", + "!".to_owned(), + Applicability::MachineApplicable + ) + .emit(); (lo.to(span), self.mk_unary(UnOp::Not, e)) } token::BinOp(token::Minus) => { @@ -3411,58 +2752,41 @@ impl<'a> Parser<'a> { let (span, e) = self.interpolated_or_expr_span(e)?; (lo.to(span), ExprKind::AddrOf(m, e)) } - token::Ident(..) if self.token.is_keyword(keywords::In) => { - self.bump(); - let place = self.parse_expr_res( - Restrictions::NO_STRUCT_LITERAL, - None, - )?; - let blk = self.parse_block()?; - let span = blk.span; - let blk_expr = self.mk_expr(span, ExprKind::Block(blk, None), ThinVec::new()); - (lo.to(span), ExprKind::ObsoleteInPlace(place, blk_expr)) - } - token::Ident(..) if self.token.is_keyword(keywords::Box) => { + token::Ident(..) if self.token.is_keyword(kw::Box) => { self.bump(); let e = self.parse_prefix_expr(None); let (span, e) = self.interpolated_or_expr_span(e)?; (lo.to(span), ExprKind::Box(e)) } - token::Ident(..) if self.token.is_ident_named("not") => { + token::Ident(..) if self.token.is_ident_named(sym::not) => { // `not` is just an ordinary identifier in Rust-the-language, // but as `rustc`-the-compiler, we can issue clever diagnostics // for confused users who really want to say `!` - let token_cannot_continue_expr = |t: &token::Token| match *t { + let token_cannot_continue_expr = |t: &Token| match t.kind { // These tokens can start an expression after `!`, but // can't continue an expression after an ident - token::Ident(ident, is_raw) => token::ident_can_begin_expr(ident, is_raw), + token::Ident(name, is_raw) => token::ident_can_begin_expr(name, t.span, is_raw), token::Literal(..) | token::Pound => true, - token::Interpolated(ref nt) => match **nt { - token::NtIdent(..) | token::NtExpr(..) | - token::NtBlock(..) | token::NtPath(..) => true, - _ => false, - }, - _ => false + _ => t.is_whole_expr(), }; let cannot_continue_expr = self.look_ahead(1, token_cannot_continue_expr); if cannot_continue_expr { self.bump(); // Emit the error ... - let mut err = self.diagnostic() - .struct_span_err(self.span, - &format!("unexpected {} after identifier", - self.this_token_descr())); - // span the `not` plus trailing whitespace to avoid - // trailing whitespace after the `!` in our suggestion - let to_replace = self.sess.source_map() - .span_until_non_whitespace(lo.to(self.span)); - err.span_suggestion_short( - to_replace, + self.struct_span_err( + self.token.span, + &format!("unexpected {} after identifier",self.this_token_descr()) + ) + .span_suggestion_short( + // Span the `not` plus trailing whitespace to avoid + // trailing whitespace after the `!` in our suggestion + self.sess.source_map() + .span_until_non_whitespace(lo.to(self.token.span)), "use `!` to perform logical negation", "!".to_owned(), Applicability::MachineApplicable - ); - err.emit(); + ) + .emit(); // —and recover! (just as if we were in the block // for the `token::Not` arm) let e = self.parse_prefix_expr(None); @@ -3489,10 +2813,11 @@ impl<'a> Parser<'a> { } /// Parses an associative expression with operators of at least `min_prec` precedence. - fn parse_assoc_expr_with(&mut self, - min_prec: usize, - lhs: LhsExpr) - -> PResult<'a, P<Expr>> { + fn parse_assoc_expr_with( + &mut self, + min_prec: usize, + lhs: LhsExpr, + ) -> PResult<'a, P<Expr>> { let mut lhs = if let LhsExpr::AlreadyParsed(expr) = lhs { expr } else { @@ -3500,16 +2825,57 @@ impl<'a> Parser<'a> { LhsExpr::AttributesParsed(attrs) => Some(attrs), _ => None, }; - if [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token) { + if [token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind) { return self.parse_prefix_range_expr(attrs); } else { self.parse_prefix_expr(attrs)? } }; - - if self.expr_is_complete(&lhs) { - // Semi-statement forms are odd. See https://github.com/rust-lang/rust/issues/29071 - return Ok(lhs); + let last_type_ascription_set = self.last_type_ascription.is_some(); + + match (self.expr_is_complete(&lhs), AssocOp::from_token(&self.token)) { + (true, None) => { + self.last_type_ascription = None; + // Semi-statement forms are odd. See https://github.com/rust-lang/rust/issues/29071 + return Ok(lhs); + } + (false, _) => {} // continue parsing the expression + // An exhaustive check is done in the following block, but these are checked first + // because they *are* ambiguous but also reasonable looking incorrect syntax, so we + // want to keep their span info to improve diagnostics in these cases in a later stage. + (true, Some(AssocOp::Multiply)) | // `{ 42 } *foo = bar;` or `{ 42 } * 3` + (true, Some(AssocOp::Subtract)) | // `{ 42 } -5` + (true, Some(AssocOp::LAnd)) | // `{ 42 } &&x` (#61475) + (true, Some(AssocOp::Add)) // `{ 42 } + 42 + // If the next token is a keyword, then the tokens above *are* unambiguously incorrect: + // `if x { a } else { b } && if y { c } else { d }` + if !self.look_ahead(1, |t| t.is_reserved_ident()) => { + self.last_type_ascription = None; + // These cases are ambiguous and can't be identified in the parser alone + let sp = self.sess.source_map().start_point(self.token.span); + self.sess.ambiguous_block_expr_parse.borrow_mut().insert(sp, lhs.span); + return Ok(lhs); + } + (true, Some(ref op)) if !op.can_continue_expr_unambiguously() => { + self.last_type_ascription = None; + return Ok(lhs); + } + (true, Some(_)) => { + // We've found an expression that would be parsed as a statement, but the next + // token implies this should be parsed as an expression. + // For example: `if let Some(x) = x { x } else { 0 } / 2` + let mut err = self.struct_span_err(self.token.span, &format!( + "expected expression, found `{}`", + pprust::token_to_string(&self.token), + )); + err.span_label(self.token.span, "expected expression"); + self.sess.expr_parentheses_needed( + &mut err, + lhs.span, + Some(pprust::expr_to_string(&lhs), + )); + err.emit(); + } } self.expected_tokens.push(TokenType::Operator); while let Some(op) = AssocOp::from_token(&self.token) { @@ -3525,18 +2891,19 @@ impl<'a> Parser<'a> { _ => lhs.span, }; - let cur_op_span = self.span; + let cur_op_span = self.token.span; let restrictions = if op.is_assign_like() { self.restrictions & Restrictions::NO_STRUCT_LITERAL } else { self.restrictions }; - if op.precedence() < min_prec { + let prec = op.precedence(); + if prec < min_prec { break; } // Check for deprecated `...` syntax if self.token == token::DotDotDot && op == AssocOp::DotDotEq { - self.err_dotdotdot_syntax(self.span); + self.err_dotdotdot_syntax(self.token.span); } self.bump(); @@ -3548,25 +2915,10 @@ impl<'a> Parser<'a> { lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Cast)?; continue } else if op == AssocOp::Colon { - lhs = match self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Type) { - Ok(lhs) => lhs, - Err(mut err) => { - err.span_label(self.span, - "expecting a type here because of type ascription"); - let cm = self.sess.source_map(); - let cur_pos = cm.lookup_char_pos(self.span.lo()); - let op_pos = cm.lookup_char_pos(cur_op_span.hi()); - if cur_pos.line != op_pos.line { - err.span_suggestion( - cur_op_span, - "try using a semicolon", - ";".to_string(), - Applicability::MaybeIncorrect // speculative - ); - } - return Err(err); - } - }; + let maybe_path = self.could_ascription_be_path(&lhs.node); + self.last_type_ascription = Some((self.prev_span, maybe_path)); + + lhs = self.parse_assoc_op_cast(lhs, lhs_span, ExprKind::Type)?; continue } else if op == AssocOp::DotDot || op == AssocOp::DotDotEq { // If we didn’t have to handle `x..`/`x..=`, it would be pretty easy to @@ -3575,8 +2927,7 @@ impl<'a> Parser<'a> { // We have 2 alternatives here: `x..y`/`x..=y` and `x..`/`x..=` The other // two variants are handled with `parse_prefix_range_expr` call above. let rhs = if self.is_at_start_of_range_notation_rhs() { - Some(self.parse_assoc_expr_with(op.precedence() + 1, - LhsExpr::NotYetParsed)?) + Some(self.parse_assoc_expr_with(prec + 1, LhsExpr::NotYetParsed)?) } else { None }; @@ -3596,28 +2947,18 @@ impl<'a> Parser<'a> { break } - let rhs = match op.fixity() { - Fixity::Right => self.with_res( - restrictions - Restrictions::STMT_EXPR, - |this| { - this.parse_assoc_expr_with(op.precedence(), - LhsExpr::NotYetParsed) - }), - Fixity::Left => self.with_res( - restrictions - Restrictions::STMT_EXPR, - |this| { - this.parse_assoc_expr_with(op.precedence() + 1, - LhsExpr::NotYetParsed) - }), + let fixity = op.fixity(); + let prec_adjustment = match fixity { + Fixity::Right => 0, + Fixity::Left => 1, // We currently have no non-associative operators that are not handled above by // the special cases. The code is here only for future convenience. - Fixity::None => self.with_res( - restrictions - Restrictions::STMT_EXPR, - |this| { - this.parse_assoc_expr_with(op.precedence() + 1, - LhsExpr::NotYetParsed) - }), - }?; + Fixity::None => 1, + }; + let rhs = self.with_res( + restrictions - Restrictions::STMT_EXPR, + |this| this.parse_assoc_expr_with(prec + prec_adjustment, LhsExpr::NotYetParsed) + )?; // Make sure that the span of the parent node is larger than the span of lhs and rhs, // including the attributes. @@ -3638,10 +2979,7 @@ impl<'a> Parser<'a> { let binary = self.mk_binary(source_map::respan(cur_op_span, ast_op), lhs, rhs); self.mk_expr(span, binary, ThinVec::new()) } - AssocOp::Assign => - self.mk_expr(span, ExprKind::Assign(lhs, rhs), ThinVec::new()), - AssocOp::ObsoleteInPlace => - self.mk_expr(span, ExprKind::ObsoleteInPlace(lhs, rhs), ThinVec::new()), + AssocOp::Assign => self.mk_expr(span, ExprKind::Assign(lhs, rhs), ThinVec::new()), AssocOp::AssignOp(k) => { let aop = match k { token::Plus => BinOpKind::Add, @@ -3663,7 +3001,10 @@ impl<'a> Parser<'a> { } }; - if op.fixity() == Fixity::None { break } + if let Fixity::None = fixity { break } + } + if last_type_ascription_set { + self.last_type_ascription = None; } Ok(lhs) } @@ -3691,7 +3032,7 @@ impl<'a> Parser<'a> { match self.parse_path(PathStyle::Expr) { Ok(path) => { - let (op_noun, op_verb) = match self.token { + let (op_noun, op_verb) = match self.token.kind { token::Lt => ("comparison", "comparing"), token::BinOp(token::Shl) => ("shift", "shifting"), _ => { @@ -3711,26 +3052,29 @@ impl<'a> Parser<'a> { // in AST and continue parsing. let msg = format!("`<` is interpreted as a start of generic \ arguments for `{}`, not a {}", path, op_noun); - let mut err = self.sess.span_diagnostic.struct_span_err(self.span, &msg); - err.span_label(self.look_ahead_span(1).to(parser_snapshot_after_type.span), - "interpreted as generic arguments"); - err.span_label(self.span, format!("not interpreted as {}", op_noun)); - + let span_after_type = parser_snapshot_after_type.token.span; let expr = mk_expr(self, P(Ty { span: path.span, node: TyKind::Path(None, path), id: ast::DUMMY_NODE_ID })); - let expr_str = self.sess.source_map().span_to_snippet(expr.span) - .unwrap_or_else(|_| pprust::expr_to_string(&expr)); - err.span_suggestion( - expr.span, - &format!("try {} the cast value", op_verb), - format!("({})", expr_str), - Applicability::MachineApplicable - ); - err.emit(); + let expr_str = self.span_to_snippet(expr.span) + .unwrap_or_else(|_| pprust::expr_to_string(&expr)); + + self.struct_span_err(self.token.span, &msg) + .span_label( + self.look_ahead(1, |t| t.span).to(span_after_type), + "interpreted as generic arguments" + ) + .span_label(self.token.span, format!("not interpreted as {}", op_noun)) + .span_suggestion( + expr.span, + &format!("try {} the cast value", op_verb), + format!("({})", expr_str), + Applicability::MachineApplicable + ) + .emit(); Ok(expr) } @@ -3745,49 +3089,22 @@ impl<'a> Parser<'a> { } } - /// Produce an error if comparison operators are chained (RFC #558). - /// We only need to check lhs, not rhs, because all comparison ops - /// have same precedence and are left-associative - fn check_no_chained_comparison(&mut self, lhs: &Expr, outer_op: &AssocOp) { - debug_assert!(outer_op.is_comparison(), - "check_no_chained_comparison: {:?} is not comparison", - outer_op); - match lhs.node { - ExprKind::Binary(op, _, _) if op.node.is_comparison() => { - // respan to include both operators - let op_span = op.span.to(self.span); - let mut err = self.diagnostic().struct_span_err(op_span, - "chained comparison operators require parentheses"); - if op.node == BinOpKind::Lt && - *outer_op == AssocOp::Less || // Include `<` to provide this recommendation - *outer_op == AssocOp::Greater // even in a case like the following: - { // Foo<Bar<Baz<Qux, ()>>> - err.help( - "use `::<...>` instead of `<...>` if you meant to specify type arguments"); - err.help("or use `(...)` if you meant to specify fn arguments"); - } - err.emit(); - } - _ => {} - } - } - /// Parse prefix-forms of range notation: `..expr`, `..`, `..=expr` fn parse_prefix_range_expr(&mut self, already_parsed_attrs: Option<ThinVec<Attribute>>) -> PResult<'a, P<Expr>> { // Check for deprecated `...` syntax if self.token == token::DotDotDot { - self.err_dotdotdot_syntax(self.span); + self.err_dotdotdot_syntax(self.token.span); } - debug_assert!([token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token), + debug_assert!([token::DotDot, token::DotDotDot, token::DotDotEq].contains(&self.token.kind), "parse_prefix_range_expr: token {:?} is not DotDot/DotDotEq", self.token); let tok = self.token.clone(); let attrs = self.parse_or_use_outer_attributes(already_parsed_attrs)?; - let lo = self.span; - let mut hi = self.span; + let lo = self.token.span; + let mut hi = self.token.span; self.bump(); let opt_end = if self.is_at_start_of_range_notation_rhs() { // RHS must be parsed with more associativity than the dots. @@ -3798,7 +3115,7 @@ impl<'a> Parser<'a> { hi = x.span; x })?) - } else { + } else { None }; let limits = if tok == token::DotDot { @@ -3823,19 +3140,16 @@ impl<'a> Parser<'a> { } } - /// Parses an `if` or `if let` expression (`if` token already eaten). + /// Parses an `if` expression (`if` token already eaten). fn parse_if_expr(&mut self, attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { - if self.check_keyword(keywords::Let) { - return self.parse_if_let_expr(attrs); - } let lo = self.prev_span; - let cond = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; + let cond = self.parse_cond_expr()?; // Verify that the parsed `if` condition makes sense as a condition. If it is a block, then // verify that the last statement is either an implicit return (no `;`) or an explicit // return. This won't catch blocks with an explicit `return`, but that would be caught by // the dead code lint. - if self.eat_keyword(keywords::Else) || !cond.returns() { + if self.eat_keyword(kw::Else) || !cond.returns() { let sp = self.sess.source_map().next_point(lo); let mut err = self.diagnostic() .struct_span_err(sp, "missing condition for `if` statemement"); @@ -3851,7 +3165,7 @@ impl<'a> Parser<'a> { })?; let mut els: Option<P<Expr>> = None; let mut hi = thn.span; - if self.eat_keyword(keywords::Else) { + if self.eat_keyword(kw::Else) { let elexpr = self.parse_else_expr()?; hi = elexpr.span; els = Some(elexpr); @@ -3859,22 +3173,32 @@ impl<'a> Parser<'a> { Ok(self.mk_expr(lo.to(hi), ExprKind::If(cond, thn, els), attrs)) } - /// Parses an `if let` expression (`if` token already eaten). - fn parse_if_let_expr(&mut self, attrs: ThinVec<Attribute>) - -> PResult<'a, P<Expr>> { + /// Parse the condition of a `if`- or `while`-expression + fn parse_cond_expr(&mut self) -> PResult<'a, P<Expr>> { + let cond = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; + + if let ExprKind::Let(..) = cond.node { + // Remove the last feature gating of a `let` expression since it's stable. + let last = self.sess.let_chains_spans.borrow_mut().pop(); + debug_assert_eq!(cond.span, last.unwrap()); + } + + Ok(cond) + } + + /// Parses a `let $pats = $expr` pseudo-expression. + /// The `let` token has already been eaten. + fn parse_let_expr(&mut self, attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { let lo = self.prev_span; - self.expect_keyword(keywords::Let)?; let pats = self.parse_pats()?; self.expect(&token::Eq)?; - let expr = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; - let thn = self.parse_block()?; - let (hi, els) = if self.eat_keyword(keywords::Else) { - let expr = self.parse_else_expr()?; - (expr.span, Some(expr)) - } else { - (thn.span, None) - }; - Ok(self.mk_expr(lo.to(hi), ExprKind::IfLet(pats, expr, thn, els), attrs)) + let expr = self.with_res( + Restrictions::NO_STRUCT_LITERAL, + |this| this.parse_assoc_expr_with(1 + prec_let_scrutinee_needs_par(), None.into()) + )?; + let span = lo.to(expr.span); + self.sess.let_chains_spans.borrow_mut().push(span); + Ok(self.mk_expr(span, ExprKind::Let(pats, expr), attrs)) } /// Parses `move |args| expr`. @@ -3882,22 +3206,25 @@ impl<'a> Parser<'a> { attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { - let lo = self.span; - let movability = if self.eat_keyword(keywords::Static) { + let lo = self.token.span; + + let movability = if self.eat_keyword(kw::Static) { Movability::Static } else { Movability::Movable }; - let asyncness = if self.span.rust_2018() { + + let asyncness = if self.token.span.rust_2018() { self.parse_asyncness() } else { IsAsync::NotAsync }; - let capture_clause = if self.eat_keyword(keywords::Move) { - CaptureBy::Value - } else { - CaptureBy::Ref - }; + if asyncness.is_async() { + // Feature gate `async ||` closures. + self.sess.async_closure_spans.borrow_mut().push(self.prev_span); + } + + let capture_clause = self.parse_capture_clause(); let decl = self.parse_fn_block_decl()?; let decl_hi = self.prev_span; let body = match decl.output { @@ -3908,7 +3235,7 @@ impl<'a> Parser<'a> { _ => { // If an explicit return type is given, require a // block to appear (RFC 968). - let body_lo = self.span; + let body_lo = self.token.span; self.parse_block_expr(None, body_lo, BlockCheckMode::Default, ThinVec::new())? } }; @@ -3919,9 +3246,9 @@ impl<'a> Parser<'a> { attrs)) } - // `else` token already eaten + /// `else` token already eaten fn parse_else_expr(&mut self) -> PResult<'a, P<Expr>> { - if self.eat_keyword(keywords::If) { + if self.eat_keyword(kw::If) { return self.parse_if_expr(ThinVec::new()); } else { let blk = self.parse_block()?; @@ -3930,42 +3257,40 @@ impl<'a> Parser<'a> { } /// Parse a 'for' .. 'in' expression ('for' token already eaten) - fn parse_for_expr(&mut self, opt_label: Option<Label>, - span_lo: Span, - mut attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { + fn parse_for_expr( + &mut self, + opt_label: Option<Label>, + span_lo: Span, + mut attrs: ThinVec<Attribute> + ) -> PResult<'a, P<Expr>> { // Parse: `for <src_pat> in <src_expr> <src_loop_block>` + // Record whether we are about to parse `for (`. + // This is used below for recovery in case of `for ( $stuff ) $block` + // in which case we will suggest `for $stuff $block`. + let begin_paren = match self.token.kind { + token::OpenDelim(token::Paren) => Some(self.token.span), + _ => None, + }; + let pat = self.parse_top_level_pat()?; - if !self.eat_keyword(keywords::In) { - let in_span = self.prev_span.between(self.span); - let mut err = self.sess.span_diagnostic - .struct_span_err(in_span, "missing `in` in `for` loop"); - err.span_suggestion_short( - in_span, "try adding `in` here", " in ".into(), - // has been misleading, at least in the past (closed Issue #48492) - Applicability::MaybeIncorrect - ); - err.emit(); + if !self.eat_keyword(kw::In) { + let in_span = self.prev_span.between(self.token.span); + self.struct_span_err(in_span, "missing `in` in `for` loop") + .span_suggestion_short( + in_span, + "try adding `in` here", " in ".into(), + // has been misleading, at least in the past (closed Issue #48492) + Applicability::MaybeIncorrect + ) + .emit(); } let in_span = self.prev_span; - if self.eat_keyword(keywords::In) { - // a common typo: `for _ in in bar {}` - let mut err = self.sess.span_diagnostic.struct_span_err( - self.prev_span, - "expected iterable, found keyword `in`", - ); - err.span_suggestion_short( - in_span.until(self.prev_span), - "remove the duplicated `in`", - String::new(), - Applicability::MachineApplicable, - ); - err.note("if you meant to use emplacement syntax, it is obsolete (for now, anyway)"); - err.note("for more information on the status of emplacement syntax, see <\ - https://github.com/rust-lang/rust/issues/27779#issuecomment-378416911>"); - err.emit(); - } + self.check_for_for_in_in_typo(in_span); let expr = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; + + let pat = self.recover_parens_around_for_head(pat, &expr, begin_paren); + let (iattrs, loop_block) = self.parse_inner_attrs_and_block()?; attrs.extend(iattrs); @@ -3977,31 +3302,14 @@ impl<'a> Parser<'a> { fn parse_while_expr(&mut self, opt_label: Option<Label>, span_lo: Span, mut attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { - if self.token.is_keyword(keywords::Let) { - return self.parse_while_let_expr(opt_label, span_lo, attrs); - } - let cond = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; + let cond = self.parse_cond_expr()?; let (iattrs, body) = self.parse_inner_attrs_and_block()?; attrs.extend(iattrs); let span = span_lo.to(body.span); - return Ok(self.mk_expr(span, ExprKind::While(cond, body, opt_label), attrs)); + Ok(self.mk_expr(span, ExprKind::While(cond, body, opt_label), attrs)) } - /// Parses a `while let` expression (`while` token already eaten). - fn parse_while_let_expr(&mut self, opt_label: Option<Label>, - span_lo: Span, - mut attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { - self.expect_keyword(keywords::Let)?; - let pats = self.parse_pats()?; - self.expect(&token::Eq)?; - let expr = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; - let (iattrs, body) = self.parse_inner_attrs_and_block()?; - attrs.extend(iattrs); - let span = span_lo.to(body.span); - return Ok(self.mk_expr(span, ExprKind::WhileLet(pats, expr, body, opt_label), attrs)); - } - - // parse `loop {...}`, `loop` token already eaten + /// Parse `loop {...}`, `loop` token already eaten. fn parse_loop_expr(&mut self, opt_label: Option<Label>, span_lo: Span, mut attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { @@ -4011,17 +3319,20 @@ impl<'a> Parser<'a> { Ok(self.mk_expr(span, ExprKind::Loop(body, opt_label), attrs)) } - /// Parses an `async move {...}` expression. - pub fn parse_async_block(&mut self, mut attrs: ThinVec<Attribute>) - -> PResult<'a, P<Expr>> - { - let span_lo = self.span; - self.expect_keyword(keywords::Async)?; - let capture_clause = if self.eat_keyword(keywords::Move) { + /// Parse an optional `move` prefix to a closure lke construct. + fn parse_capture_clause(&mut self) -> CaptureBy { + if self.eat_keyword(kw::Move) { CaptureBy::Value } else { CaptureBy::Ref - }; + } + } + + /// Parses an `async move? {...}` expression. + pub fn parse_async_block(&mut self, mut attrs: ThinVec<Attribute>) -> PResult<'a, P<Expr>> { + let span_lo = self.token.span; + self.expect_keyword(kw::Async)?; + let capture_clause = self.parse_capture_clause(); let (iattrs, body) = self.parse_inner_attrs_and_block()?; attrs.extend(iattrs); Ok(self.mk_expr( @@ -4035,7 +3346,15 @@ impl<'a> Parser<'a> { { let (iattrs, body) = self.parse_inner_attrs_and_block()?; attrs.extend(iattrs); - Ok(self.mk_expr(span_lo.to(body.span), ExprKind::TryBlock(body), attrs)) + if self.eat_keyword(kw::Catch) { + let mut error = self.struct_span_err(self.prev_span, + "keyword `catch` cannot follow a `try` block"); + error.help("try using `match` on the result of the `try` block instead"); + error.emit(); + Err(error) + } else { + Ok(self.mk_expr(span_lo.to(body.span), ExprKind::TryBlock(body), attrs)) + } } // `match` token already eaten @@ -4045,7 +3364,7 @@ impl<'a> Parser<'a> { let discriminant = self.parse_expr_res(Restrictions::NO_STRUCT_LITERAL, None)?; if let Err(mut e) = self.expect(&token::OpenDelim(token::Brace)) { - if self.token == token::Token::Semi { + if self.token == token::Semi { e.span_suggestion_short( match_span, "try removing this `match`", @@ -4065,7 +3384,7 @@ impl<'a> Parser<'a> { // Recover by skipping to the end of the block. e.emit(); self.recover_stmt(); - let span = lo.to(self.span); + let span = lo.to(self.token.span); if self.token == token::CloseDelim(token::Brace) { self.bump(); } @@ -4073,24 +3392,23 @@ impl<'a> Parser<'a> { } } } - let hi = self.span; + let hi = self.token.span; self.bump(); return Ok(self.mk_expr(lo.to(hi), ExprKind::Match(discriminant, arms), attrs)); } crate fn parse_arm(&mut self) -> PResult<'a, Arm> { - maybe_whole!(self, NtArm, |x| x); - let attrs = self.parse_outer_attributes()?; + let lo = self.token.span; let pats = self.parse_pats()?; - let guard = if self.eat_keyword(keywords::If) { - Some(Guard::If(self.parse_expr()?)) + let guard = if self.eat_keyword(kw::If) { + Some(self.parse_expr()?) } else { None }; - let arrow_span = self.span; + let arrow_span = self.token.span; self.expect(&token::FatArrow)?; - let arm_start_span = self.span; + let arm_start_span = self.token.span; let expr = self.parse_expr_res(Restrictions::STMT_EXPR, None) .map_err(|mut err| { @@ -4101,6 +3419,8 @@ impl<'a> Parser<'a> { let require_comma = classify::expr_requires_semi_to_be_stmt(&expr) && self.token != token::CloseDelim(token::Brace); + let hi = self.token.span; + if require_comma { let cm = self.sess.source_map(); self.expect_one_of(&[token::Comma], &[token::CloseDelim(token::Brace)]) @@ -4118,7 +3438,7 @@ impl<'a> Parser<'a> { // | | // | arrow_span // X | &X => "x" - // | - ^^ self.span + // | - ^^ self.token.span // | | // | parsed until here as `"y" & X` err.span_suggestion_short( @@ -4144,6 +3464,7 @@ impl<'a> Parser<'a> { pats, guard, body: expr, + span: lo.to(hi), }) } @@ -4196,15 +3517,14 @@ impl<'a> Parser<'a> { pats.push(self.parse_top_level_pat()?); if self.token == token::OrOr { - let mut err = self.struct_span_err(self.span, - "unexpected token `||` after pattern"); - err.span_suggestion( - self.span, - "use a single `|` to specify multiple patterns", - "|".to_owned(), - Applicability::MachineApplicable - ); - err.emit(); + self.struct_span_err(self.token.span, "unexpected token `||` after pattern") + .span_suggestion( + self.token.span, + "use a single `|` to specify multiple patterns", + "|".to_owned(), + Applicability::MachineApplicable + ) + .emit(); self.bump(); } else if self.eat(&token::BinOp(token::Or)) { // This is a No-op. Continue the loop to parse the next @@ -4215,110 +3535,6 @@ impl<'a> Parser<'a> { }; } - // Parses a parenthesized list of patterns like - // `()`, `(p)`, `(p,)`, `(p, q)`, or `(p, .., q)`. Returns: - // - a vector of the patterns that were parsed - // - an option indicating the index of the `..` element - // - a boolean indicating whether a trailing comma was present. - // Trailing commas are significant because (p) and (p,) are different patterns. - fn parse_parenthesized_pat_list(&mut self) -> PResult<'a, (Vec<P<Pat>>, Option<usize>, bool)> { - self.expect(&token::OpenDelim(token::Paren))?; - let result = self.parse_pat_list()?; - self.expect(&token::CloseDelim(token::Paren))?; - Ok(result) - } - - fn parse_pat_list(&mut self) -> PResult<'a, (Vec<P<Pat>>, Option<usize>, bool)> { - let mut fields = Vec::new(); - let mut ddpos = None; - let mut trailing_comma = false; - loop { - if self.eat(&token::DotDot) { - if ddpos.is_none() { - ddpos = Some(fields.len()); - } else { - // Emit a friendly error, ignore `..` and continue parsing - self.struct_span_err( - self.prev_span, - "`..` can only be used once per tuple or tuple struct pattern", - ) - .span_label(self.prev_span, "can only be used once per pattern") - .emit(); - } - } else if !self.check(&token::CloseDelim(token::Paren)) { - fields.push(self.parse_pat(None)?); - } else { - break - } - - trailing_comma = self.eat(&token::Comma); - if !trailing_comma { - break - } - } - - if ddpos == Some(fields.len()) && trailing_comma { - // `..` needs to be followed by `)` or `, pat`, `..,)` is disallowed. - let msg = "trailing comma is not permitted after `..`"; - self.struct_span_err(self.prev_span, msg) - .span_label(self.prev_span, msg) - .emit(); - } - - Ok((fields, ddpos, trailing_comma)) - } - - fn parse_pat_vec_elements( - &mut self, - ) -> PResult<'a, (Vec<P<Pat>>, Option<P<Pat>>, Vec<P<Pat>>)> { - let mut before = Vec::new(); - let mut slice = None; - let mut after = Vec::new(); - let mut first = true; - let mut before_slice = true; - - while self.token != token::CloseDelim(token::Bracket) { - if first { - first = false; - } else { - self.expect(&token::Comma)?; - - if self.token == token::CloseDelim(token::Bracket) - && (before_slice || !after.is_empty()) { - break - } - } - - if before_slice { - if self.eat(&token::DotDot) { - - if self.check(&token::Comma) || - self.check(&token::CloseDelim(token::Bracket)) { - slice = Some(P(Pat { - id: ast::DUMMY_NODE_ID, - node: PatKind::Wild, - span: self.prev_span, - })); - before_slice = false; - } - continue - } - } - - let subpat = self.parse_pat(None)?; - if before_slice && self.eat(&token::DotDot) { - slice = Some(subpat); - before_slice = false; - } else if before_slice { - before.push(subpat); - } else { - after.push(subpat); - } - } - - Ok((before, slice, after)) - } - fn parse_pat_field( &mut self, lo: Span, @@ -4335,10 +3551,10 @@ impl<'a> Parser<'a> { (pat, fieldname, false) } else { // Parsing a pattern of the form "(box) (ref) (mut) fieldname" - let is_box = self.eat_keyword(keywords::Box); - let boxed_span = self.span; - let is_ref = self.eat_keyword(keywords::Ref); - let is_mut = self.eat_keyword(keywords::Mut); + let is_box = self.eat_keyword(kw::Box); + let boxed_span = self.token.span; + let is_ref = self.eat_keyword(kw::Ref); + let is_mut = self.eat_keyword(kw::Mut); let fieldname = self.parse_ident()?; hi = self.prev_span; @@ -4386,8 +3602,16 @@ impl<'a> Parser<'a> { let mut etc_span = None; while self.token != token::CloseDelim(token::Brace) { - let attrs = self.parse_outer_attributes()?; - let lo = self.span; + let attrs = match self.parse_outer_attributes() { + Ok(attrs) => attrs, + Err(err) => { + if let Some(mut delayed) = delayed_err { + delayed.emit(); + } + return Err(err); + }, + }; + let lo = self.token.span; // check that a comma comes after every field if !ate_comma { @@ -4401,19 +3625,18 @@ impl<'a> Parser<'a> { if self.check(&token::DotDot) || self.token == token::DotDotDot { etc = true; - let mut etc_sp = self.span; + let mut etc_sp = self.token.span; if self.token == token::DotDotDot { // Issue #46718 // Accept `...` as if it were `..` to avoid further errors - let mut err = self.struct_span_err(self.span, - "expected field pattern, found `...`"); - err.span_suggestion( - self.span, - "to omit remaining fields, use one fewer `.`", - "..".to_owned(), - Applicability::MachineApplicable - ); - err.emit(); + self.struct_span_err(self.token.span, "expected field pattern, found `...`") + .span_suggestion( + self.token.span, + "to omit remaining fields, use one fewer `.`", + "..".to_owned(), + Applicability::MachineApplicable + ) + .emit(); } self.bump(); // `..` || `...` @@ -4424,18 +3647,19 @@ impl<'a> Parser<'a> { let token_str = self.this_token_descr(); let mut err = self.fatal(&format!("expected `}}`, found {}", token_str)); - err.span_label(self.span, "expected `}`"); + err.span_label(self.token.span, "expected `}`"); let mut comma_sp = None; if self.token == token::Comma { // Issue #49257 - etc_sp = etc_sp.to(self.sess.source_map().span_until_non_whitespace(self.span)); + let nw_span = self.sess.source_map().span_until_non_whitespace(self.token.span); + etc_sp = etc_sp.to(nw_span); err.span_label(etc_sp, "`..` must be at the end and cannot have a trailing comma"); - comma_sp = Some(self.span); + comma_sp = Some(self.token.span); self.bump(); ate_comma = true; } - etc_span = Some(etc_sp.until(self.span)); + etc_span = Some(etc_sp.until(self.token.span)); if self.token == token::CloseDelim(token::Brace) { // If the struct looks otherwise well formed, recover and continue. if let Some(sp) = comma_sp { @@ -4485,7 +3709,7 @@ impl<'a> Parser<'a> { "move the `..` to the end of the field list", vec![ (etc_span, String::new()), - (self.span, format!("{}.. }}", if ate_comma { "" } else { ", " })), + (self.token.span, format!("{}.. }}", if ate_comma { "" } else { ", " })), ], Applicability::MachineApplicable, ); @@ -4497,7 +3721,7 @@ impl<'a> Parser<'a> { fn parse_pat_range_end(&mut self) -> PResult<'a, P<Expr>> { if self.token.is_path_start() { - let lo = self.span; + let lo = self.token.span; let (qself, path) = if self.eat_lt() { // Parse a qualified path let (qself, path) = self.parse_qpath(PathStyle::Expr)?; @@ -4513,20 +3737,35 @@ impl<'a> Parser<'a> { } } - // helper function to decide whether to parse as ident binding or to try to do - // something more complex like range patterns + /// Is the current token suitable as the start of a range patterns end? + fn is_pat_range_end_start(&self) -> bool { + self.token.is_path_start() // e.g. `MY_CONST`; + || self.token == token::Dot // e.g. `.5` for recovery; + || self.token.can_begin_literal_or_bool() // e.g. `42`. + || self.token.is_whole_expr() + } + + // Helper function to decide whether to parse as ident binding + // or to try to do something more complex like range patterns. fn parse_as_ident(&mut self) -> bool { - self.look_ahead(1, |t| match *t { + self.look_ahead(1, |t| match t.kind { token::OpenDelim(token::Paren) | token::OpenDelim(token::Brace) | - token::DotDotDot | token::DotDotEq | token::ModSep | token::Not => Some(false), - // ensure slice patterns [a, b.., c] and [a, b, c..] don't go into the - // range pattern branch - token::DotDot => None, - _ => Some(true), - }).unwrap_or_else(|| self.look_ahead(2, |t| match *t { - token::Comma | token::CloseDelim(token::Bracket) => true, - _ => false, - })) + token::DotDotDot | token::DotDotEq | token::DotDot | + token::ModSep | token::Not => false, + _ => true, + }) + } + + /// Parse and throw away a parentesized comma separated + /// sequence of patterns until `)` is reached. + fn skip_pat_list(&mut self) -> PResult<'a, ()> { + while !self.check(&token::CloseDelim(token::Paren)) { + self.parse_pat(None)?; + if !self.eat(&token::Comma) { + return Ok(()) + } + } + Ok(()) } /// A wrapper around `parse_pat` with some special error handling for the @@ -4540,9 +3779,9 @@ impl<'a> Parser<'a> { // parentheses in what should have been a tuple pattern; return a // suggestion-enhanced error here rather than choking on the comma // later. - let comma_span = self.span; + let comma_span = self.token.span; self.bump(); - if let Err(mut err) = self.parse_pat_list() { + if let Err(mut err) = self.skip_pat_list() { // We didn't expect this to work anyway; we just wanted // to advance to the end of the comma-sequence so we know // the span to suggest parenthesizing @@ -4551,7 +3790,7 @@ impl<'a> Parser<'a> { let seq_span = pat.span.to(self.prev_span); let mut err = self.struct_span_err(comma_span, "unexpected `,` in pattern"); - if let Ok(seq_snippet) = self.sess.source_map().span_to_snippet(seq_span) { + if let Ok(seq_snippet) = self.span_to_snippet(seq_span) { err.span_suggestion( seq_span, "try adding parentheses to match on a tuple..", @@ -4574,6 +3813,53 @@ impl<'a> Parser<'a> { self.parse_pat_with_range_pat(true, expected) } + /// Parse a range-to pattern, e.g. `..X` and `..=X` for recovery. + fn parse_pat_range_to(&mut self, re: RangeEnd, form: &str) -> PResult<'a, PatKind> { + let lo = self.prev_span; + let end = self.parse_pat_range_end()?; + let range_span = lo.to(end.span); + let begin = self.mk_expr(range_span, ExprKind::Err, ThinVec::new()); + + self.diagnostic() + .struct_span_err(range_span, &format!("`{}X` range patterns are not supported", form)) + .span_suggestion( + range_span, + "try using the minimum value for the type", + format!("MIN{}{}", form, pprust::expr_to_string(&end)), + Applicability::HasPlaceholders, + ) + .emit(); + + Ok(PatKind::Range(begin, end, respan(lo, re))) + } + + /// Parse the end of a `X..Y`, `X..=Y`, or `X...Y` range pattern or recover + /// if that end is missing treating it as `X..`, `X..=`, or `X...` respectively. + fn parse_pat_range_end_opt(&mut self, begin: &Expr, form: &str) -> PResult<'a, P<Expr>> { + if self.is_pat_range_end_start() { + // Parsing e.g. `X..=Y`. + self.parse_pat_range_end() + } else { + // Parsing e.g. `X..`. + let range_span = begin.span.to(self.prev_span); + + self.diagnostic() + .struct_span_err( + range_span, + &format!("`X{}` range patterns are not supported", form), + ) + .span_suggestion( + range_span, + "try using the maximum value for the type", + format!("{}{}MAX", pprust::expr_to_string(&begin), form), + Applicability::HasPlaceholders, + ) + .emit(); + + Ok(self.mk_expr(range_span, ExprKind::Err, ThinVec::new())) + } + } + /// Parses a pattern, with a setting whether modern range patterns (e.g., `a..=b`, `a..b` are /// allowed). fn parse_pat_with_range_pat( @@ -4581,48 +3867,69 @@ impl<'a> Parser<'a> { allow_range_pat: bool, expected: Option<&'static str>, ) -> PResult<'a, P<Pat>> { + maybe_recover_from_interpolated_ty_qpath!(self, true); maybe_whole!(self, NtPat, |x| x); - let lo = self.span; + let lo = self.token.span; let pat; - match self.token { + match self.token.kind { token::BinOp(token::And) | token::AndAnd => { // Parse &pat / &mut pat self.expect_and()?; let mutbl = self.parse_mutability(); - if let token::Lifetime(ident) = self.token { - let mut err = self.fatal(&format!("unexpected lifetime `{}` in pattern", - ident)); - err.span_label(self.span, "unexpected lifetime"); + if let token::Lifetime(name) = self.token.kind { + let mut err = self.fatal(&format!("unexpected lifetime `{}` in pattern", name)); + err.span_label(self.token.span, "unexpected lifetime"); return Err(err); } let subpat = self.parse_pat_with_range_pat(false, expected)?; pat = PatKind::Ref(subpat, mutbl); } token::OpenDelim(token::Paren) => { - // Parse (pat,pat,pat,...) as tuple pattern - let (fields, ddpos, trailing_comma) = self.parse_parenthesized_pat_list()?; - pat = if fields.len() == 1 && ddpos.is_none() && !trailing_comma { + // Parse a tuple or parenthesis pattern. + let (fields, trailing_comma) = self.parse_paren_comma_seq(|p| p.parse_pat(None))?; + + // Here, `(pat,)` is a tuple pattern. + // For backward compatibility, `(..)` is a tuple pattern as well. + pat = if fields.len() == 1 && !(trailing_comma || fields[0].is_rest()) { PatKind::Paren(fields.into_iter().nth(0).unwrap()) } else { - PatKind::Tuple(fields, ddpos) + PatKind::Tuple(fields) }; } token::OpenDelim(token::Bracket) => { - // Parse [pat,pat,...] as slice pattern + // Parse `[pat, pat,...]` as a slice pattern. + let (slice, _) = self.parse_delim_comma_seq(token::Bracket, |p| p.parse_pat(None))?; + pat = PatKind::Slice(slice); + } + token::DotDot => { + self.bump(); + pat = if self.is_pat_range_end_start() { + // Parse `..42` for recovery. + self.parse_pat_range_to(RangeEnd::Excluded, "..")? + } else { + // A rest pattern `..`. + PatKind::Rest + }; + } + token::DotDotEq => { + // Parse `..=42` for recovery. + self.bump(); + pat = self.parse_pat_range_to(RangeEnd::Included(RangeSyntax::DotDotEq), "..=")?; + } + token::DotDotDot => { + // Parse `...42` for recovery. self.bump(); - let (before, slice, after) = self.parse_pat_vec_elements()?; - self.expect(&token::CloseDelim(token::Bracket))?; - pat = PatKind::Slice(before, slice, after); + pat = self.parse_pat_range_to(RangeEnd::Included(RangeSyntax::DotDotDot), "...")?; } // At this point, token != &, &&, (, [ - _ => if self.eat_keyword(keywords::Underscore) { + _ => if self.eat_keyword(kw::Underscore) { // Parse _ pat = PatKind::Wild; - } else if self.eat_keyword(keywords::Mut) { + } else if self.eat_keyword(kw::Mut) { // Parse mut ident @ pat / mut ref ident @ pat - let mutref_span = self.prev_span.to(self.span); - let binding_mode = if self.eat_keyword(keywords::Ref) { + let mutref_span = self.prev_span.to(self.token.span); + let binding_mode = if self.eat_keyword(kw::Ref) { self.diagnostic() .struct_span_err(mutref_span, "the order of `mut` and `ref` is incorrect") .span_suggestion( @@ -4636,11 +3943,11 @@ impl<'a> Parser<'a> { BindingMode::ByValue(Mutability::Mutable) }; pat = self.parse_pat_ident(binding_mode)?; - } else if self.eat_keyword(keywords::Ref) { + } else if self.eat_keyword(kw::Ref) { // Parse ref ident @ pat / ref mut ident @ pat let mutbl = self.parse_mutability(); pat = self.parse_pat_ident(BindingMode::ByRef(mutbl))?; - } else if self.eat_keyword(keywords::Box) { + } else if self.eat_keyword(kw::Box) { // Parse box pat let subpat = self.parse_pat_with_range_pat(false, None)?; pat = PatKind::Box(subpat); @@ -4661,36 +3968,40 @@ impl<'a> Parser<'a> { // Parse an unqualified path (None, self.parse_path(PathStyle::Expr)?) }; - match self.token { + match self.token.kind { token::Not if qself.is_none() => { // Parse macro invocation self.bump(); let (delim, tts) = self.expect_delimited_token_tree()?; - let mac = respan(lo.to(self.prev_span), Mac_ { path, tts, delim }); + let mac = respan(lo.to(self.prev_span), Mac_ { + path, + tts, + delim, + prior_type_ascription: self.last_type_ascription, + }); pat = PatKind::Mac(mac); } token::DotDotDot | token::DotDotEq | token::DotDot => { - let end_kind = match self.token { - token::DotDot => RangeEnd::Excluded, - token::DotDotDot => RangeEnd::Included(RangeSyntax::DotDotDot), - token::DotDotEq => RangeEnd::Included(RangeSyntax::DotDotEq), + let (end_kind, form) = match self.token.kind { + token::DotDot => (RangeEnd::Excluded, ".."), + token::DotDotDot => (RangeEnd::Included(RangeSyntax::DotDotDot), "..."), + token::DotDotEq => (RangeEnd::Included(RangeSyntax::DotDotEq), "..="), _ => panic!("can only parse `..`/`...`/`..=` for ranges \ (checked above)"), }; - let op_span = self.span; + let op_span = self.token.span; // Parse range let span = lo.to(self.prev_span); let begin = self.mk_expr(span, ExprKind::Path(qself, path), ThinVec::new()); self.bump(); - let end = self.parse_pat_range_end()?; - let op = Spanned { span: op_span, node: end_kind }; - pat = PatKind::Range(begin, end, op); + let end = self.parse_pat_range_end_opt(&begin, form)?; + pat = PatKind::Range(begin, end, respan(op_span, end_kind)); } token::OpenDelim(token::Brace) => { if qself.is_some() { let msg = "unexpected `{` after qualified path"; let mut err = self.fatal(msg); - err.span_label(self.span, msg); + err.span_label(self.token.span, msg); return Err(err); } // Parse struct pattern @@ -4698,7 +4009,7 @@ impl<'a> Parser<'a> { let (fields, etc) = self.parse_pat_fields().unwrap_or_else(|mut e| { e.emit(); self.recover_stmt(); - (vec![], false) + (vec![], true) }); self.bump(); pat = PatKind::Struct(path, fields, etc); @@ -4707,12 +4018,12 @@ impl<'a> Parser<'a> { if qself.is_some() { let msg = "unexpected `(` after qualified path"; let mut err = self.fatal(msg); - err.span_label(self.span, msg); + err.span_label(self.token.span, msg); return Err(err); } // Parse tuple struct or enum pattern - let (fields, ddpos, _) = self.parse_parenthesized_pat_list()?; - pat = PatKind::TupleStruct(path, fields, ddpos) + let (fields, _) = self.parse_paren_comma_seq(|p| p.parse_pat(None))?; + pat = PatKind::TupleStruct(path, fields) } _ => pat = PatKind::Path(qself, path), } @@ -4720,22 +4031,21 @@ impl<'a> Parser<'a> { // Try to parse everything else as literal with optional minus match self.parse_literal_maybe_minus() { Ok(begin) => { - let op_span = self.span; + let op_span = self.token.span; if self.check(&token::DotDot) || self.check(&token::DotDotEq) || self.check(&token::DotDotDot) { - let end_kind = if self.eat(&token::DotDotDot) { - RangeEnd::Included(RangeSyntax::DotDotDot) + let (end_kind, form) = if self.eat(&token::DotDotDot) { + (RangeEnd::Included(RangeSyntax::DotDotDot), "...") } else if self.eat(&token::DotDotEq) { - RangeEnd::Included(RangeSyntax::DotDotEq) + (RangeEnd::Included(RangeSyntax::DotDotEq), "..=") } else if self.eat(&token::DotDot) { - RangeEnd::Excluded + (RangeEnd::Excluded, "..") } else { panic!("impossible case: we already matched \ on a range-operator token") }; - let end = self.parse_pat_range_end()?; - let op = Spanned { span: op_span, node: end_kind }; - pat = PatKind::Range(begin, end, op); + let end = self.parse_pat_range_end_opt(&begin, form)?; + pat = PatKind::Range(begin, end, respan(op_span, end_kind)) } else { pat = PatKind::Lit(begin); } @@ -4749,14 +4059,18 @@ impl<'a> Parser<'a> { self.this_token_descr(), ); let mut err = self.fatal(&msg); - err.span_label(self.span, format!("expected {}", expected)); + err.span_label(self.token.span, format!("expected {}", expected)); + let sp = self.sess.source_map().start_point(self.token.span); + if let Some(sp) = self.sess.ambiguous_block_expr_parse.borrow().get(&sp) { + self.sess.expr_parentheses_needed(&mut err, *sp, None); + } return Err(err); } } } } - let pat = Pat { node: pat, span: lo.to(self.prev_span), id: ast::DUMMY_NODE_ID }; + let pat = P(Pat { node: pat, span: lo.to(self.prev_span), id: ast::DUMMY_NODE_ID }); let pat = self.maybe_recover_from_bad_qpath(pat, true)?; if !allow_range_pat { @@ -4782,7 +4096,7 @@ impl<'a> Parser<'a> { } } - Ok(P(pat)) + Ok(pat) } /// Parses `ident` or `ident @ pat`. @@ -4830,7 +4144,7 @@ impl<'a> Parser<'a> { let parser_snapshot_after_type = self.clone(); mem::replace(self, parser_snapshot_before_type); - let snippet = self.sess.source_map().span_to_snippet(pat.span).unwrap(); + let snippet = self.span_to_snippet(pat.span).unwrap(); err.span_label(pat.span, format!("while parsing the type for `{}`", snippet)); (Some((parser_snapshot_after_type, colon_sp, err)), None) } @@ -4872,7 +4186,7 @@ impl<'a> Parser<'a> { } }; let hi = if self.token == token::Semi { - self.span + self.token.span } else { self.prev_span }; @@ -4925,92 +4239,6 @@ impl<'a> Parser<'a> { Ok(self.parse_stmt_(true)) } - // Eat tokens until we can be relatively sure we reached the end of the - // statement. This is something of a best-effort heuristic. - // - // We terminate when we find an unmatched `}` (without consuming it). - fn recover_stmt(&mut self) { - self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore) - } - - // If `break_on_semi` is `Break`, then we will stop consuming tokens after - // finding (and consuming) a `;` outside of `{}` or `[]` (note that this is - // approximate - it can mean we break too early due to macros, but that - // should only lead to sub-optimal recovery, not inaccurate parsing). - // - // If `break_on_block` is `Break`, then we will stop consuming tokens - // after finding (and consuming) a brace-delimited block. - fn recover_stmt_(&mut self, break_on_semi: SemiColonMode, break_on_block: BlockMode) { - let mut brace_depth = 0; - let mut bracket_depth = 0; - let mut in_block = false; - debug!("recover_stmt_ enter loop (semi={:?}, block={:?})", - break_on_semi, break_on_block); - loop { - debug!("recover_stmt_ loop {:?}", self.token); - match self.token { - token::OpenDelim(token::DelimToken::Brace) => { - brace_depth += 1; - self.bump(); - if break_on_block == BlockMode::Break && - brace_depth == 1 && - bracket_depth == 0 { - in_block = true; - } - } - token::OpenDelim(token::DelimToken::Bracket) => { - bracket_depth += 1; - self.bump(); - } - token::CloseDelim(token::DelimToken::Brace) => { - if brace_depth == 0 { - debug!("recover_stmt_ return - close delim {:?}", self.token); - break; - } - brace_depth -= 1; - self.bump(); - if in_block && bracket_depth == 0 && brace_depth == 0 { - debug!("recover_stmt_ return - block end {:?}", self.token); - break; - } - } - token::CloseDelim(token::DelimToken::Bracket) => { - bracket_depth -= 1; - if bracket_depth < 0 { - bracket_depth = 0; - } - self.bump(); - } - token::Eof => { - debug!("recover_stmt_ return - Eof"); - break; - } - token::Semi => { - self.bump(); - if break_on_semi == SemiColonMode::Break && - brace_depth == 0 && - bracket_depth == 0 { - debug!("recover_stmt_ return - Semi"); - break; - } - } - token::Comma => { - if break_on_semi == SemiColonMode::Comma && - brace_depth == 0 && - bracket_depth == 0 { - debug!("recover_stmt_ return - Semi"); - break; - } else { - self.bump(); - } - } - _ => { - self.bump() - } - } - } - } - fn parse_stmt_(&mut self, macro_legacy_warnings: bool) -> Option<Stmt> { self.parse_stmt_without_recovery(macro_legacy_warnings).unwrap_or_else(|mut e| { e.emit(); @@ -5019,11 +4247,11 @@ impl<'a> Parser<'a> { }) } - fn is_async_block(&mut self) -> bool { - self.token.is_keyword(keywords::Async) && + fn is_async_block(&self) -> bool { + self.token.is_keyword(kw::Async) && ( ( // `async move {` - self.look_ahead(1, |t| t.is_keyword(keywords::Move)) && + self.is_keyword_ahead(1, &[kw::Move]) && self.look_ahead(2, |t| *t == token::OpenDelim(token::Brace)) ) || ( // `async {` self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) @@ -5031,117 +4259,107 @@ impl<'a> Parser<'a> { ) } - fn is_async_fn(&mut self) -> bool { - self.token.is_keyword(keywords::Async) && - self.look_ahead(1, |t| t.is_keyword(keywords::Fn)) + fn is_async_fn(&self) -> bool { + self.token.is_keyword(kw::Async) && + self.is_keyword_ahead(1, &[kw::Fn]) } - fn is_do_catch_block(&mut self) -> bool { - self.token.is_keyword(keywords::Do) && - self.look_ahead(1, |t| t.is_keyword(keywords::Catch)) && + fn is_do_catch_block(&self) -> bool { + self.token.is_keyword(kw::Do) && + self.is_keyword_ahead(1, &[kw::Catch]) && self.look_ahead(2, |t| *t == token::OpenDelim(token::Brace)) && !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL) } - fn is_try_block(&mut self) -> bool { - self.token.is_keyword(keywords::Try) && + fn is_try_block(&self) -> bool { + self.token.is_keyword(kw::Try) && self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) && - self.span.rust_2018() && + self.token.span.rust_2018() && // prevent `while try {} {}`, `if try {} {} else {}`, etc. !self.restrictions.contains(Restrictions::NO_STRUCT_LITERAL) } fn is_union_item(&self) -> bool { - self.token.is_keyword(keywords::Union) && + self.token.is_keyword(kw::Union) && self.look_ahead(1, |t| t.is_ident() && !t.is_reserved_ident()) } fn is_crate_vis(&self) -> bool { - self.token.is_keyword(keywords::Crate) && self.look_ahead(1, |t| t != &token::ModSep) - } - - fn is_existential_type_decl(&self) -> bool { - self.token.is_keyword(keywords::Existential) && - self.look_ahead(1, |t| t.is_keyword(keywords::Type)) + self.token.is_keyword(kw::Crate) && self.look_ahead(1, |t| t != &token::ModSep) } - fn is_auto_trait_item(&mut self) -> bool { + fn is_auto_trait_item(&self) -> bool { // auto trait - (self.token.is_keyword(keywords::Auto) - && self.look_ahead(1, |t| t.is_keyword(keywords::Trait))) + (self.token.is_keyword(kw::Auto) && + self.is_keyword_ahead(1, &[kw::Trait])) || // unsafe auto trait - (self.token.is_keyword(keywords::Unsafe) && - self.look_ahead(1, |t| t.is_keyword(keywords::Auto)) && - self.look_ahead(2, |t| t.is_keyword(keywords::Trait))) + (self.token.is_keyword(kw::Unsafe) && + self.is_keyword_ahead(1, &[kw::Auto]) && + self.is_keyword_ahead(2, &[kw::Trait])) } fn eat_macro_def(&mut self, attrs: &[Attribute], vis: &Visibility, lo: Span) -> PResult<'a, Option<P<Item>>> { - let token_lo = self.span; - let (ident, def) = match self.token { - token::Ident(ident, false) if ident.name == keywords::Macro.name() => { - self.bump(); - let ident = self.parse_ident()?; - let tokens = if self.check(&token::OpenDelim(token::Brace)) { - match self.parse_token_tree() { - TokenTree::Delimited(_, _, tts) => tts, - _ => unreachable!(), - } - } else if self.check(&token::OpenDelim(token::Paren)) { - let args = self.parse_token_tree(); - let body = if self.check(&token::OpenDelim(token::Brace)) { - self.parse_token_tree() - } else { - self.unexpected()?; - unreachable!() - }; - TokenStream::new(vec![ - args.into(), - TokenTree::Token(token_lo.to(self.prev_span), token::FatArrow).into(), - body.into(), - ]) + let token_lo = self.token.span; + let (ident, def) = if self.eat_keyword(kw::Macro) { + let ident = self.parse_ident()?; + let tokens = if self.check(&token::OpenDelim(token::Brace)) { + match self.parse_token_tree() { + TokenTree::Delimited(_, _, tts) => tts, + _ => unreachable!(), + } + } else if self.check(&token::OpenDelim(token::Paren)) { + let args = self.parse_token_tree(); + let body = if self.check(&token::OpenDelim(token::Brace)) { + self.parse_token_tree() } else { self.unexpected()?; unreachable!() }; + TokenStream::new(vec![ + args.into(), + TokenTree::token(token::FatArrow, token_lo.to(self.prev_span)).into(), + body.into(), + ]) + } else { + self.unexpected()?; + unreachable!() + }; - (ident, ast::MacroDef { tokens: tokens.into(), legacy: false }) - } - token::Ident(ident, _) if ident.name == "macro_rules" && - self.look_ahead(1, |t| *t == token::Not) => { - let prev_span = self.prev_span; - self.complain_if_pub_macro(&vis.node, prev_span); - self.bump(); - self.bump(); - - let ident = self.parse_ident()?; - let (delim, tokens) = self.expect_delimited_token_tree()?; - if delim != MacDelimiter::Brace { - if !self.eat(&token::Semi) { - let msg = "macros that expand to items must either \ - be surrounded with braces or followed by a semicolon"; - self.span_err(self.prev_span, msg); - } - } + (ident, ast::MacroDef { tokens: tokens.into(), legacy: false }) + } else if self.check_keyword(sym::macro_rules) && + self.look_ahead(1, |t| *t == token::Not) && + self.look_ahead(2, |t| t.is_ident()) { + let prev_span = self.prev_span; + self.complain_if_pub_macro(&vis.node, prev_span); + self.bump(); + self.bump(); - (ident, ast::MacroDef { tokens: tokens, legacy: true }) + let ident = self.parse_ident()?; + let (delim, tokens) = self.expect_delimited_token_tree()?; + if delim != MacDelimiter::Brace && !self.eat(&token::Semi) { + self.report_invalid_macro_expansion_item(); } - _ => return Ok(None), + + (ident, ast::MacroDef { tokens, legacy: true }) + } else { + return Ok(None); }; let span = lo.to(self.prev_span); Ok(Some(self.mk_item(span, ident, ItemKind::MacroDef(def), vis.clone(), attrs.to_vec()))) } - fn parse_stmt_without_recovery(&mut self, - macro_legacy_warnings: bool) - -> PResult<'a, Option<Stmt>> { + fn parse_stmt_without_recovery( + &mut self, + macro_legacy_warnings: bool, + ) -> PResult<'a, Option<Stmt>> { maybe_whole!(self, NtStmt, |x| Some(x)); let attrs = self.parse_outer_attributes()?; - let lo = self.span; + let lo = self.token.span; - Ok(Some(if self.eat_keyword(keywords::Let) { + Ok(Some(if self.eat_keyword(kw::Let) { Stmt { id: ast::DUMMY_NODE_ID, node: StmtKind::Local(self.parse_local(attrs.into())?), @@ -5167,17 +4385,16 @@ impl<'a> Parser<'a> { !self.token.is_qpath_start() && !self.is_union_item() && !self.is_crate_vis() && - !self.is_existential_type_decl() && !self.is_auto_trait_item() && !self.is_async_fn() { - let pth = self.parse_path(PathStyle::Expr)?; + let path = self.parse_path(PathStyle::Expr)?; if !self.eat(&token::Not) { let expr = if self.check(&token::OpenDelim(token::Brace)) { - self.parse_struct_expr(lo, pth, ThinVec::new())? + self.parse_struct_expr(lo, path, ThinVec::new())? } else { let hi = self.prev_span; - self.mk_expr(lo.to(hi), ExprKind::Path(None, pth), ThinVec::new()) + self.mk_expr(lo.to(hi), ExprKind::Path(None, path), ThinVec::new()) }; let expr = self.with_res(Restrictions::STMT_EXPR, |this| { @@ -5192,34 +4409,6 @@ impl<'a> Parser<'a> { })); } - // it's a macro invocation - let id = match self.token { - token::OpenDelim(_) => keywords::Invalid.ident(), // no special identifier - _ => self.parse_ident()?, - }; - - // check that we're pointing at delimiters (need to check - // again after the `if`, because of `parse_ident` - // consuming more tokens). - match self.token { - token::OpenDelim(_) => {} - _ => { - // we only expect an ident if we didn't parse one - // above. - let ident_str = if id.name == keywords::Invalid.name() { - "identifier, " - } else { - "" - }; - let tok_str = self.this_token_descr(); - let mut err = self.fatal(&format!("expected {}`(` or `{{`, found {}", - ident_str, - tok_str)); - err.span_label(self.span, format!("expected {}`(` or `{{`", ident_str)); - return Err(err) - }, - } - let (delim, tts) = self.expect_delimited_token_tree()?; let hi = self.prev_span; @@ -5229,61 +4418,43 @@ impl<'a> Parser<'a> { MacStmtStyle::NoBraces }; - if id.name == keywords::Invalid.name() { - let mac = respan(lo.to(hi), Mac_ { path: pth, tts, delim }); - let node = if delim == MacDelimiter::Brace || - self.token == token::Semi || self.token == token::Eof { - StmtKind::Mac(P((mac, style, attrs.into()))) - } - // We used to incorrectly stop parsing macro-expanded statements here. - // If the next token will be an error anyway but could have parsed with the - // earlier behavior, stop parsing here and emit a warning to avoid breakage. - else if macro_legacy_warnings && self.token.can_begin_expr() && match self.token { - // These can continue an expression, so we can't stop parsing and warn. - token::OpenDelim(token::Paren) | token::OpenDelim(token::Bracket) | - token::BinOp(token::Minus) | token::BinOp(token::Star) | - token::BinOp(token::And) | token::BinOp(token::Or) | - token::AndAnd | token::OrOr | - token::DotDot | token::DotDotDot | token::DotDotEq => false, - _ => true, - } { - self.warn_missing_semicolon(); - StmtKind::Mac(P((mac, style, attrs.into()))) - } else { - let e = self.mk_mac_expr(lo.to(hi), mac.node, ThinVec::new()); - let e = self.parse_dot_or_call_expr_with(e, lo, attrs.into())?; - let e = self.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(e))?; - StmtKind::Expr(e) - }; - Stmt { - id: ast::DUMMY_NODE_ID, - span: lo.to(hi), - node, - } + let mac = respan(lo.to(hi), Mac_ { + path, + tts, + delim, + prior_type_ascription: self.last_type_ascription, + }); + let node = if delim == MacDelimiter::Brace || + self.token == token::Semi || self.token == token::Eof { + StmtKind::Mac(P((mac, style, attrs.into()))) + } + // We used to incorrectly stop parsing macro-expanded statements here. + // If the next token will be an error anyway but could have parsed with the + // earlier behavior, stop parsing here and emit a warning to avoid breakage. + else if macro_legacy_warnings && + self.token.can_begin_expr() && + match self.token.kind { + // These can continue an expression, so we can't stop parsing and warn. + token::OpenDelim(token::Paren) | token::OpenDelim(token::Bracket) | + token::BinOp(token::Minus) | token::BinOp(token::Star) | + token::BinOp(token::And) | token::BinOp(token::Or) | + token::AndAnd | token::OrOr | + token::DotDot | token::DotDotDot | token::DotDotEq => false, + _ => true, + } { + self.warn_missing_semicolon(); + StmtKind::Mac(P((mac, style, attrs.into()))) } else { - // if it has a special ident, it's definitely an item - // - // Require a semicolon or braces. - if style != MacStmtStyle::Braces { - if !self.eat(&token::Semi) { - self.span_err(self.prev_span, - "macros that expand to items must \ - either be surrounded with braces or \ - followed by a semicolon"); - } - } - let span = lo.to(hi); - Stmt { - id: ast::DUMMY_NODE_ID, - span, - node: StmtKind::Item({ - self.mk_item( - span, id /*id is good here*/, - ItemKind::Mac(respan(span, Mac_ { path: pth, tts, delim })), - respan(lo, VisibilityKind::Inherited), - attrs) - }), - } + let e = self.mk_expr(mac.span, ExprKind::Mac(mac), ThinVec::new()); + let e = self.maybe_recover_from_bad_qpath(e, true)?; + let e = self.parse_dot_or_call_expr_with(e, lo, attrs.into())?; + let e = self.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(e))?; + StmtKind::Expr(e) + }; + Stmt { + id: ast::DUMMY_NODE_ID, + span: lo.to(hi), + node, } } else { // FIXME: Bad copy of attrs @@ -5304,7 +4475,9 @@ impl<'a> Parser<'a> { if s.prev_token_kind == PrevTokenKind::DocComment { s.span_fatal_err(s.prev_span, Error::UselessDocComment).emit(); } else if attrs.iter().any(|a| a.style == AttrStyle::Outer) { - s.span_err(s.span, "expected statement after outer attribute"); + s.span_err( + s.token.span, "expected statement after outer attribute" + ); } } }; @@ -5335,7 +4508,7 @@ impl<'a> Parser<'a> { } /// Checks if this expression is a successfully parsed statement. - fn expr_is_complete(&mut self, e: &Expr) -> bool { + fn expr_is_complete(&self, e: &Expr) -> bool { self.restrictions.contains(Restrictions::STMT_EXPR) && !classify::expr_requires_semi_to_be_stmt(e) } @@ -5344,26 +4517,26 @@ impl<'a> Parser<'a> { pub fn parse_block(&mut self) -> PResult<'a, P<Block>> { maybe_whole!(self, NtBlock, |x| x); - let lo = self.span; + let lo = self.token.span; if !self.eat(&token::OpenDelim(token::Brace)) { - let sp = self.span; + let sp = self.token.span; let tok = self.this_token_descr(); let mut e = self.span_fatal(sp, &format!("expected `{{`, found {}", tok)); let do_not_suggest_help = - self.token.is_keyword(keywords::In) || self.token == token::Colon; + self.token.is_keyword(kw::In) || self.token == token::Colon; - if self.token.is_ident_named("and") { + if self.token.is_ident_named(sym::and) { e.span_suggestion_short( - self.span, + self.token.span, "use `&&` instead of `and` for the boolean operator", "&&".to_string(), Applicability::MaybeIncorrect, ); } - if self.token.is_ident_named("or") { + if self.token.is_ident_named(sym::or) { e.span_suggestion_short( - self.span, + self.token.span, "use `||` instead of `or` for the boolean operator", "||".to_string(), Applicability::MaybeIncorrect, @@ -5390,20 +4563,15 @@ impl<'a> Parser<'a> { if self.eat(&token::Semi) { stmt_span = stmt_span.with_hi(self.prev_span.hi()); } - let sugg = pprust::to_string(|s| { - use crate::print::pprust::{PrintState, INDENT_UNIT}; - s.ibox(INDENT_UNIT)?; - s.bopen()?; - s.print_stmt(&stmt)?; - s.bclose_maybe_open(stmt.span, INDENT_UNIT, false) - }); - e.span_suggestion( - stmt_span, - "try placing this code inside a block", - sugg, - // speculative, has been misleading in the past (closed Issue #46836) - Applicability::MaybeIncorrect - ); + if let Ok(snippet) = self.span_to_snippet(stmt_span) { + e.span_suggestion( + stmt_span, + "try placing this code inside a block", + format!("{{ {} }}", snippet), + // speculative, has been misleading in the past (#46836) + Applicability::MaybeIncorrect, + ); + } } Err(mut e) => { self.recover_stmt_(SemiColonMode::Break, BlockMode::Ignore); @@ -5419,10 +4587,10 @@ impl<'a> Parser<'a> { } /// Parses a block. Inner attributes are allowed. - fn parse_inner_attrs_and_block(&mut self) -> PResult<'a, (Vec<Attribute>, P<Block>)> { + crate fn parse_inner_attrs_and_block(&mut self) -> PResult<'a, (Vec<Attribute>, P<Block>)> { maybe_whole!(self, NtBlock, |x| (Vec::new(), x)); - let lo = self.span; + let lo = self.token.span; self.expect(&token::OpenDelim(token::Brace))?; Ok((self.parse_inner_attributes()?, self.parse_block_tail(lo, BlockCheckMode::Default)?)) @@ -5433,22 +4601,23 @@ impl<'a> Parser<'a> { fn parse_block_tail(&mut self, lo: Span, s: BlockCheckMode) -> PResult<'a, P<Block>> { let mut stmts = vec![]; while !self.eat(&token::CloseDelim(token::Brace)) { + if self.token == token::Eof { + break; + } let stmt = match self.parse_full_stmt(false) { Err(mut err) => { err.emit(); self.recover_stmt_(SemiColonMode::Ignore, BlockMode::Ignore); Some(Stmt { id: ast::DUMMY_NODE_ID, - node: StmtKind::Expr(DummyResult::raw_expr(self.span, true)), - span: self.span, + node: StmtKind::Expr(DummyResult::raw_expr(self.token.span, true)), + span: self.token.span, }) } Ok(stmt) => stmt, }; if let Some(stmt) = stmt { stmts.push(stmt); - } else if self.token == token::Eof { - break; } else { // Found only `;` or `}`. continue; @@ -5482,6 +4651,9 @@ impl<'a> Parser<'a> { { e.emit(); self.recover_stmt(); + // Don't complain about type errors in body tail after parse error (#57383). + let sp = expr.span.to(self.prev_span); + stmt.node = StmtKind::Expr(DummyResult::raw_expr(sp, true)); } } } @@ -5499,13 +4671,12 @@ impl<'a> Parser<'a> { if self.eat(&token::Semi) { stmt = stmt.add_trailing_semicolon(); } - - stmt.span = stmt.span.with_hi(self.prev_span.hi()); + stmt.span = stmt.span.to(self.prev_span); Ok(Some(stmt)) } fn warn_missing_semicolon(&self) { - self.diagnostic().struct_span_warn(self.span, { + self.diagnostic().struct_span_warn(self.token.span, { &format!("expected `;`, found {}", self.this_token_descr()) }).note({ "This was erroneously allowed and will become a hard error in a future release" @@ -5540,16 +4711,16 @@ impl<'a> Parser<'a> { let mut last_plus_span = None; let mut was_negative = false; loop { - // This needs to be synchronized with `Token::can_begin_bound`. + // This needs to be synchronized with `TokenKind::can_begin_bound`. let is_bound_start = self.check_path() || self.check_lifetime() || self.check(&token::Not) || // used for error reporting only self.check(&token::Question) || - self.check_keyword(keywords::For) || + self.check_keyword(kw::For) || self.check(&token::OpenDelim(token::Paren)); if is_bound_start { - let lo = self.span; + let lo = self.token.span; let has_parens = self.eat(&token::OpenDelim(token::Paren)); - let inner_lo = self.span; + let inner_lo = self.token.span; let is_negative = self.eat(&token::Not); let question = if self.eat(&token::Question) { Some(self.prev_span) } else { None }; if self.token.is_lifetime() { @@ -5565,7 +4736,7 @@ impl<'a> Parser<'a> { lo.to(self.prev_span), "parenthesized lifetime bounds are not supported" ); - if let Ok(snippet) = self.sess.source_map().span_to_snippet(inner_span) { + if let Ok(snippet) = self.span_to_snippet(inner_span) { err.span_suggestion_short( lo.to(self.prev_span), "remove the parentheses", @@ -5610,25 +4781,32 @@ impl<'a> Parser<'a> { if !negative_bounds.is_empty() || was_negative { let plural = negative_bounds.len() > 1; - let mut err = self.struct_span_err(negative_bounds, - "negative trait bounds are not supported"); + let last_span = negative_bounds.last().map(|sp| *sp); + let mut err = self.struct_span_err( + negative_bounds, + "negative trait bounds are not supported", + ); + if let Some(sp) = last_span { + err.span_label(sp, "negative trait bounds are not supported"); + } if let Some(bound_list) = colon_span { let bound_list = bound_list.to(self.prev_span); let mut new_bound_list = String::new(); if !bounds.is_empty() { let mut snippets = bounds.iter().map(|bound| bound.span()) - .map(|span| self.sess.source_map().span_to_snippet(span)); + .map(|span| self.span_to_snippet(span)); while let Some(Ok(snippet)) = snippets.next() { new_bound_list.push_str(" + "); new_bound_list.push_str(&snippet); } new_bound_list = new_bound_list.replacen(" +", ":", 1); } - err.span_suggestion_short(bound_list, - &format!("remove the trait bound{}", - if plural { "s" } else { "" }), - new_bound_list, - Applicability::MachineApplicable); + err.span_suggestion_hidden( + bound_list, + &format!("remove the trait bound{}", if plural { "s" } else { "" }), + new_bound_list, + Applicability::MachineApplicable, + ); } err.emit(); } @@ -5636,7 +4814,8 @@ impl<'a> Parser<'a> { return Ok(bounds); } - fn parse_generic_bounds(&mut self, colon_span: Option<Span>) -> PResult<'a, GenericBounds> { + crate fn parse_generic_bounds(&mut self, + colon_span: Option<Span>) -> PResult<'a, GenericBounds> { self.parse_generic_bounds_common(true, colon_span) } @@ -5714,7 +4893,7 @@ impl<'a> Parser<'a> { } fn parse_const_param(&mut self, preceding_attrs: Vec<Attribute>) -> PResult<'a, GenericParam> { - self.expect_keyword(keywords::Const)?; + self.expect_keyword(kw::Const)?; let ident = self.parse_ident()?; self.expect(&token::Colon)?; let ty = self.parse_ty()?; @@ -5751,7 +4930,7 @@ impl<'a> Parser<'a> { bounds, kind: ast::GenericParamKind::Lifetime, }); - } else if self.check_keyword(keywords::Const) { + } else if self.check_keyword(kw::Const) { // Parse const parameter. params.push(self.parse_const_param(attrs)?); } else if self.check_ident() { @@ -5797,24 +4976,22 @@ impl<'a> Parser<'a> { /// | ( < lifetimes , typaramseq ( , )? > ) /// where typaramseq = ( typaram ) | ( typaram , typaramseq ) fn parse_generics(&mut self) -> PResult<'a, ast::Generics> { - maybe_whole!(self, NtGenerics, |x| x); - - let span_lo = self.span; - if self.eat_lt() { + let span_lo = self.token.span; + let (params, span) = if self.eat_lt() { let params = self.parse_generic_params()?; self.expect_gt()?; - Ok(ast::Generics { - params, - where_clause: WhereClause { - id: ast::DUMMY_NODE_ID, - predicates: Vec::new(), - span: syntax_pos::DUMMY_SP, - }, - span: span_lo.to(self.prev_span), - }) + (params, span_lo.to(self.prev_span)) } else { - Ok(ast::Generics::default()) - } + (vec![], self.prev_span.between(self.token.span)) + }; + Ok(ast::Generics { + params, + where_clause: WhereClause { + predicates: Vec::new(), + span: DUMMY_SP, + }, + span, + }) } /// Parses generic args (within a path segment) with recovery for extra leading angle brackets. @@ -5830,7 +5007,7 @@ impl<'a> Parser<'a> { &mut self, style: PathStyle, lo: Span, - ) -> PResult<'a, (Vec<GenericArg>, Vec<TypeBinding>)> { + ) -> PResult<'a, (Vec<GenericArg>, Vec<AssocTyConstraint>)> { // We need to detect whether there are extra leading left angle brackets and produce an // appropriate error and suggestion. This cannot be implemented by looking ahead at // upcoming tokens for a matching `>` character - if there are unmatched `<` tokens @@ -5965,51 +5142,61 @@ impl<'a> Parser<'a> { /// Parses (possibly empty) list of lifetime and type arguments and associated type bindings, /// possibly including trailing comma. - fn parse_generic_args(&mut self) -> PResult<'a, (Vec<GenericArg>, Vec<TypeBinding>)> { + fn parse_generic_args(&mut self) -> PResult<'a, (Vec<GenericArg>, Vec<AssocTyConstraint>)> { let mut args = Vec::new(); - let mut bindings = Vec::new(); - let mut misplaced_assoc_ty_bindings: Vec<Span> = Vec::new(); - let mut assoc_ty_bindings: Vec<Span> = Vec::new(); + let mut constraints = Vec::new(); + let mut misplaced_assoc_ty_constraints: Vec<Span> = Vec::new(); + let mut assoc_ty_constraints: Vec<Span> = Vec::new(); - let args_lo = self.span; + let args_lo = self.token.span; loop { if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) { // Parse lifetime argument. args.push(GenericArg::Lifetime(self.expect_lifetime())); - misplaced_assoc_ty_bindings.append(&mut assoc_ty_bindings); - } else if self.check_ident() && self.look_ahead(1, |t| t == &token::Eq) { - // Parse associated type binding. - let lo = self.span; + misplaced_assoc_ty_constraints.append(&mut assoc_ty_constraints); + } else if self.check_ident() && self.look_ahead(1, + |t| t == &token::Eq || t == &token::Colon) { + // Parse associated type constraint. + let lo = self.token.span; let ident = self.parse_ident()?; - self.bump(); - let ty = self.parse_ty()?; + let kind = if self.eat(&token::Eq) { + AssocTyConstraintKind::Equality { + ty: self.parse_ty()?, + } + } else if self.eat(&token::Colon) { + AssocTyConstraintKind::Bound { + bounds: self.parse_generic_bounds(Some(self.prev_span))?, + } + } else { + unreachable!(); + }; let span = lo.to(self.prev_span); - bindings.push(TypeBinding { + constraints.push(AssocTyConstraint { id: ast::DUMMY_NODE_ID, ident, - ty, + kind, span, }); - assoc_ty_bindings.push(span); + assoc_ty_constraints.push(span); } else if self.check_const_arg() { - // FIXME(const_generics): to distinguish between idents for types and consts, - // we should introduce a GenericArg::Ident in the AST and distinguish when - // lowering to the HIR. For now, idents for const args are not permitted. - // Parse const argument. - let expr = if let token::OpenDelim(token::Brace) = self.token { - self.parse_block_expr(None, self.span, BlockCheckMode::Default, ThinVec::new())? + let expr = if let token::OpenDelim(token::Brace) = self.token.kind { + self.parse_block_expr( + None, self.token.span, BlockCheckMode::Default, ThinVec::new() + )? } else if self.token.is_ident() { // FIXME(const_generics): to distinguish between idents for types and consts, // we should introduce a GenericArg::Ident in the AST and distinguish when // lowering to the HIR. For now, idents for const args are not permitted. - return Err( - self.fatal("identifiers may currently not be used for const generics") - ); + if self.token.is_keyword(kw::True) || self.token.is_keyword(kw::False) { + self.parse_literal_maybe_minus()? + } else { + return Err( + self.fatal("identifiers may currently not be used for const generics") + ); + } } else { - // FIXME(const_generics): this currently conflicts with emplacement syntax - // with negative integer literals. self.parse_literal_maybe_minus()? }; let value = AnonConst { @@ -6017,11 +5204,11 @@ impl<'a> Parser<'a> { value: expr, }; args.push(GenericArg::Const(value)); - misplaced_assoc_ty_bindings.append(&mut assoc_ty_bindings); + misplaced_assoc_ty_constraints.append(&mut assoc_ty_constraints); } else if self.check_type() { // Parse type argument. args.push(GenericArg::Type(self.parse_ty()?)); - misplaced_assoc_ty_bindings.append(&mut assoc_ty_bindings); + misplaced_assoc_ty_constraints.append(&mut assoc_ty_constraints); } else { break } @@ -6034,12 +5221,12 @@ impl<'a> Parser<'a> { // FIXME: we would like to report this in ast_validation instead, but we currently do not // preserve ordering of generic parameters with respect to associated type binding, so we // lose that information after parsing. - if misplaced_assoc_ty_bindings.len() > 0 { + if misplaced_assoc_ty_constraints.len() > 0 { let mut err = self.struct_span_err( args_lo.to(self.prev_span), "associated type bindings must be declared after generic parameters", ); - for span in misplaced_assoc_ty_bindings { + for span in misplaced_assoc_ty_constraints { err.span_label( span, "this associated type binding should be moved after the generic parameters", @@ -6048,7 +5235,7 @@ impl<'a> Parser<'a> { err.emit(); } - Ok((args, bindings)) + Ok((args, constraints)) } /// Parses an optional where-clause and places it in `generics`. @@ -6057,15 +5244,12 @@ impl<'a> Parser<'a> { /// where T : Trait<U, V> + 'b, 'a : 'b /// ``` fn parse_where_clause(&mut self) -> PResult<'a, WhereClause> { - maybe_whole!(self, NtWhereClause, |x| x); - let mut where_clause = WhereClause { - id: ast::DUMMY_NODE_ID, predicates: Vec::new(), - span: syntax_pos::DUMMY_SP, + span: self.prev_span.to(self.prev_span), }; - if !self.eat_keyword(keywords::Where) { + if !self.eat_keyword(kw::Where) { return Ok(where_clause); } let lo = self.prev_span; @@ -6084,7 +5268,7 @@ impl<'a> Parser<'a> { } loop { - let lo = self.span; + let lo = self.token.span; if self.check_lifetime() && self.look_ahead(1, |t| !t.is_like_plus()) { let lifetime = self.expect_lifetime(); // Bounds starting with a colon are mandatory, but possibly empty. @@ -6101,9 +5285,10 @@ impl<'a> Parser<'a> { // Parse optional `for<'a, 'b>`. // This `for` is parsed greedily and applies to the whole predicate, // the bounded type can have its own `for` applying only to it. - // Example 1: for<'a> Trait1<'a>: Trait2<'a /*ok*/> - // Example 2: (for<'a> Trait1<'a>): Trait2<'a /*not ok*/> - // Example 3: for<'a> for<'b> Trait1<'a, 'b>: Trait2<'a /*ok*/, 'b /*not ok*/> + // Examples: + // * `for<'a> Trait1<'a>: Trait2<'a /* ok */>` + // * `(for<'a> Trait1<'a>): Trait2<'a /* not ok */>` + // * `for<'a> for<'b> Trait1<'a, 'b>: Trait2<'a /* ok */, 'b /* not ok */>` let lifetime_defs = self.parse_late_bound_lifetime_defs()?; // Parse type with mandatory colon and (possibly empty) bounds, @@ -6149,55 +5334,48 @@ impl<'a> Parser<'a> { fn parse_fn_args(&mut self, named_args: bool, allow_c_variadic: bool) -> PResult<'a, (Vec<Arg> , bool)> { - self.expect(&token::OpenDelim(token::Paren))?; - - let sp = self.span; + let sp = self.token.span; let mut c_variadic = false; - let (args, recovered): (Vec<Option<Arg>>, bool) = - self.parse_seq_to_before_end( - &token::CloseDelim(token::Paren), - SeqSep::trailing_allowed(token::Comma), - |p| { - // If the argument is a C-variadic argument we should not - // enforce named arguments. - let enforce_named_args = if p.token == token::DotDotDot { + let (args, _): (Vec<Option<Arg>>, _) = self.parse_paren_comma_seq(|p| { + let do_not_enforce_named_arguments_for_c_variadic = + |token: &token::Token| -> bool { + if token == &token::DotDotDot { false } else { named_args - }; - match p.parse_arg_general(enforce_named_args, false, - allow_c_variadic) { - Ok(arg) => { - if let TyKind::CVarArgs = arg.ty.node { - c_variadic = true; - if p.token != token::CloseDelim(token::Paren) { - let span = p.span; - p.span_err(span, - "`...` must be the last argument of a C-variadic function"); - Ok(None) - } else { - Ok(Some(arg)) - } - } else { - Ok(Some(arg)) - } - }, - Err(mut e) => { - e.emit(); - let lo = p.prev_span; - // Skip every token until next possible arg or end. - p.eat_to_tokens(&[&token::Comma, &token::CloseDelim(token::Paren)]); - // Create a placeholder argument for proper arg count (issue #34264). - let span = lo.to(p.prev_span); - Ok(Some(dummy_arg(span))) + } + }; + match p.parse_arg_general( + false, + allow_c_variadic, + do_not_enforce_named_arguments_for_c_variadic + ) { + Ok(arg) => { + if let TyKind::CVarArgs = arg.ty.node { + c_variadic = true; + if p.token != token::CloseDelim(token::Paren) { + let span = p.token.span; + p.span_err(span, + "`...` must be the last argument of a C-variadic function"); + Ok(None) + } else { + Ok(Some(arg)) } + } else { + Ok(Some(arg)) } + }, + Err(mut e) => { + e.emit(); + let lo = p.prev_span; + // Skip every token until next possible arg or end. + p.eat_to_tokens(&[&token::Comma, &token::CloseDelim(token::Paren)]); + // Create a placeholder argument for proper arg count (issue #34264). + let span = lo.to(p.prev_span); + Ok(Some(dummy_arg(Ident::new(kw::Invalid, span)))) } - )?; - - if !recovered { - self.eat(&token::CloseDelim(token::Paren)); - } + } + })?; let args: Vec<_> = args.into_iter().filter_map(|x| x).collect(); @@ -6211,7 +5389,6 @@ impl<'a> Parser<'a> { /// Parses the argument list and result type of a function declaration. fn parse_fn_decl(&mut self, allow_c_variadic: bool) -> PResult<'a, P<FnDecl>> { - let (args, c_variadic) = self.parse_fn_args(true, allow_c_variadic)?; let ret_ty = self.parse_ret_ty(true)?; @@ -6223,33 +5400,35 @@ impl<'a> Parser<'a> { } /// Returns the parsed optional self argument and whether a self shortcut was used. + /// + /// See `parse_self_arg_with_attrs` to collect attributes. fn parse_self_arg(&mut self) -> PResult<'a, Option<Arg>> { - let expect_ident = |this: &mut Self| match this.token { + let expect_ident = |this: &mut Self| match this.token.kind { // Preserve hygienic context. - token::Ident(ident, _) => - { let span = this.span; this.bump(); Ident::new(ident.name, span) } + token::Ident(name, _) => + { let span = this.token.span; this.bump(); Ident::new(name, span) } _ => unreachable!() }; let isolated_self = |this: &mut Self, n| { - this.look_ahead(n, |t| t.is_keyword(keywords::SelfLower)) && + this.look_ahead(n, |t| t.is_keyword(kw::SelfLower)) && this.look_ahead(n + 1, |t| t != &token::ModSep) }; - // Parse optional self parameter of a method. - // Only a limited set of initial token sequences is considered self parameters, anything + // Parse optional `self` parameter of a method. + // Only a limited set of initial token sequences is considered `self` parameters; anything // else is parsed as a normal function parameter list, so some lookahead is required. - let eself_lo = self.span; - let (eself, eself_ident, eself_hi) = match self.token { + let eself_lo = self.token.span; + let (eself, eself_ident, eself_hi) = match self.token.kind { token::BinOp(token::And) => { - // &self - // &mut self - // &'lt self - // &'lt mut self - // ¬_self + // `&self` + // `&mut self` + // `&'lt self` + // `&'lt mut self` + // `¬_self` (if isolated_self(self, 1) { self.bump(); SelfKind::Region(None, Mutability::Immutable) - } else if self.look_ahead(1, |t| t.is_keyword(keywords::Mut)) && + } else if self.is_keyword_ahead(1, &[kw::Mut]) && isolated_self(self, 2) { self.bump(); self.bump(); @@ -6260,7 +5439,7 @@ impl<'a> Parser<'a> { let lt = self.expect_lifetime(); SelfKind::Region(Some(lt), Mutability::Immutable) } else if self.look_ahead(1, |t| t.is_lifetime()) && - self.look_ahead(2, |t| t.is_keyword(keywords::Mut)) && + self.is_keyword_ahead(2, &[kw::Mut]) && isolated_self(self, 3) { self.bump(); let lt = self.expect_lifetime(); @@ -6271,24 +5450,24 @@ impl<'a> Parser<'a> { }, expect_ident(self), self.prev_span) } token::BinOp(token::Star) => { - // *self - // *const self - // *mut self - // *not_self + // `*self` + // `*const self` + // `*mut self` + // `*not_self` // Emit special error for `self` cases. let msg = "cannot pass `self` by raw pointer"; (if isolated_self(self, 1) { self.bump(); - self.struct_span_err(self.span, msg) - .span_label(self.span, msg) + self.struct_span_err(self.token.span, msg) + .span_label(self.token.span, msg) .emit(); SelfKind::Value(Mutability::Immutable) } else if self.look_ahead(1, |t| t.is_mutability()) && isolated_self(self, 2) { self.bump(); self.bump(); - self.struct_span_err(self.span, msg) - .span_label(self.span, msg) + self.struct_span_err(self.token.span, msg) + .span_label(self.token.span, msg) .emit(); SelfKind::Value(Mutability::Immutable) } else { @@ -6297,8 +5476,8 @@ impl<'a> Parser<'a> { } token::Ident(..) => { if isolated_self(self, 0) { - // self - // self: TYPE + // `self` + // `self: TYPE` let eself_ident = expect_ident(self); let eself_hi = self.prev_span; (if self.eat(&token::Colon) { @@ -6307,10 +5486,10 @@ impl<'a> Parser<'a> { } else { SelfKind::Value(Mutability::Immutable) }, eself_ident, eself_hi) - } else if self.token.is_keyword(keywords::Mut) && + } else if self.token.is_keyword(kw::Mut) && isolated_self(self, 1) { - // mut self - // mut self: TYPE + // `mut self` + // `mut self: TYPE` self.bump(); let eself_ident = expect_ident(self); let eself_hi = self.prev_span; @@ -6328,7 +5507,18 @@ impl<'a> Parser<'a> { }; let eself = source_map::respan(eself_lo.to(eself_hi), eself); - Ok(Some(Arg::from_self(eself, eself_ident))) + Ok(Some(Arg::from_self(ThinVec::default(), eself, eself_ident))) + } + + /// Returns the parsed optional self argument with attributes and whether a self + /// shortcut was used. + fn parse_self_arg_with_attrs(&mut self) -> PResult<'a, Option<Arg>> { + let attrs = self.parse_arg_attributes()?; + let arg_opt = self.parse_self_arg()?; + Ok(arg_opt.map(|mut arg| { + arg.attrs = attrs.into(); + arg + })) } /// Parses the parameter list and result type of a function that may have a `self` parameter. @@ -6337,17 +5527,17 @@ impl<'a> Parser<'a> { { self.expect(&token::OpenDelim(token::Paren))?; - // Parse optional self argument - let self_arg = self.parse_self_arg()?; + // Parse optional self argument. + let self_arg = self.parse_self_arg_with_attrs()?; // Parse the rest of the function parameter list. let sep = SeqSep::trailing_allowed(token::Comma); - let (fn_inputs, recovered) = if let Some(self_arg) = self_arg { + let (mut fn_inputs, recovered) = if let Some(self_arg) = self_arg { if self.check(&token::CloseDelim(token::Paren)) { (vec![self_arg], false) } else if self.eat(&token::Comma) { let mut fn_inputs = vec![self_arg]; - let (mut input, recovered) = self.parse_seq_to_before_end( + let (mut input, _, recovered) = self.parse_seq_to_before_end( &token::CloseDelim(token::Paren), sep, parse_arg_fn)?; fn_inputs.append(&mut input); (fn_inputs, recovered) @@ -6358,13 +5548,18 @@ impl<'a> Parser<'a> { } } } else { - self.parse_seq_to_before_end(&token::CloseDelim(token::Paren), sep, parse_arg_fn)? + let (input, _, recovered) = + self.parse_seq_to_before_end(&token::CloseDelim(token::Paren), sep, parse_arg_fn)?; + (input, recovered) }; if !recovered { // Parse closing paren and return type. self.expect(&token::CloseDelim(token::Paren))?; } + // Replace duplicated recovered arguments with `_` pattern to avoid unecessary errors. + self.deduplicate_recovered_arg_names(&mut fn_inputs); + Ok(P(FnDecl { inputs: fn_inputs, output: self.parse_ret_ty(true)?, @@ -6405,7 +5600,7 @@ impl<'a> Parser<'a> { Ok((id, generics)) } - fn mk_item(&mut self, span: Span, ident: Ident, node: ItemKind, vis: Visibility, + fn mk_item(&self, span: Span, ident: Ident, node: ItemKind, vis: Visibility, attrs: Vec<Attribute>) -> P<Item> { P(Item { ident, @@ -6436,10 +5631,9 @@ impl<'a> Parser<'a> { /// Returns `true` if we are looking at `const ID` /// (returns `false` for things like `const fn`, etc.). - fn is_const_item(&mut self) -> bool { - self.token.is_keyword(keywords::Const) && - !self.look_ahead(1, |t| t.is_keyword(keywords::Fn)) && - !self.look_ahead(1, |t| t.is_keyword(keywords::Unsafe)) + fn is_const_item(&self) -> bool { + self.token.is_keyword(kw::Const) && + !self.is_keyword_ahead(1, &[kw::Fn, kw::Unsafe]) } /// Parses all the "front matter" for a `fn` declaration, up to @@ -6458,22 +5652,30 @@ impl<'a> Parser<'a> { Abi )> { - let is_const_fn = self.eat_keyword(keywords::Const); + let is_const_fn = self.eat_keyword(kw::Const); let const_span = self.prev_span; - let unsafety = self.parse_unsafety(); let asyncness = self.parse_asyncness(); + if let IsAsync::Async { .. } = asyncness { + self.ban_async_in_2015(self.prev_span); + } let asyncness = respan(self.prev_span, asyncness); + let unsafety = self.parse_unsafety(); let (constness, unsafety, abi) = if is_const_fn { (respan(const_span, Constness::Const), unsafety, Abi::Rust) } else { - let abi = if self.eat_keyword(keywords::Extern) { + let abi = if self.eat_keyword(kw::Extern) { self.parse_opt_abi()?.unwrap_or(Abi::C) } else { Abi::Rust }; (respan(self.prev_span, Constness::NotConst), unsafety, abi) }; - self.expect_keyword(keywords::Fn)?; + if !self.eat_keyword(kw::Fn) { + // It is possible for `expect_one_of` to recover given the contents of + // `self.expected_tokens`, therefore, do not use `self.unexpected()` which doesn't + // account for this. + if !self.expect_one_of(&[], &[])? { unreachable!() } + } Ok((constness, unsafety, asyncness, abi)) } @@ -6499,20 +5701,20 @@ impl<'a> Parser<'a> { fn parse_impl_item_(&mut self, at_end: &mut bool, mut attrs: Vec<Attribute>) -> PResult<'a, ImplItem> { - let lo = self.span; + let lo = self.token.span; let vis = self.parse_visibility(false)?; let defaultness = self.parse_defaultness(); let (name, node, generics) = if let Some(type_) = self.eat_type() { let (name, alias, generics) = type_?; let kind = match alias { - AliasKind::Weak(typ) => ast::ImplItemKind::Type(typ), - AliasKind::Existential(bounds) => ast::ImplItemKind::Existential(bounds), + AliasKind::Weak(typ) => ast::ImplItemKind::TyAlias(typ), + AliasKind::OpaqueTy(bounds) => ast::ImplItemKind::OpaqueTy(bounds), }; (name, kind, generics) } else if self.is_const_item() { // This parses the grammar: // ImplItemConst = "const" Ident ":" Ty "=" Expr ";" - self.expect_keyword(keywords::Const)?; + self.expect_keyword(kw::Const)?; let name = self.parse_ident()?; self.expect(&token::Colon)?; let typ = self.parse_ty()?; @@ -6539,15 +5741,11 @@ impl<'a> Parser<'a> { }) } - fn complain_if_pub_macro(&mut self, vis: &VisibilityKind, sp: Span) { + fn complain_if_pub_macro(&self, vis: &VisibilityKind, sp: Span) { match *vis { VisibilityKind::Inherited => {} _ => { - let is_macro_rules: bool = match self.token { - token::Ident(sid, _) => sid.name == Symbol::intern("macro_rules"), - _ => false, - }; - let mut err = if is_macro_rules { + let mut err = if self.token.is_keyword(sym::macro_rules) { let mut err = self.diagnostic() .struct_span_err(sp, "can't qualify macro_rules invocation with `pub`"); err.span_suggestion( @@ -6568,7 +5766,7 @@ impl<'a> Parser<'a> { } } - fn missing_assoc_item_kind_err(&mut self, item_type: &str, prev_span: Span) + fn missing_assoc_item_kind_err(&self, item_type: &str, prev_span: Span) -> DiagnosticBuilder<'a> { let expected_kinds = if item_type == "extern" { @@ -6602,13 +5800,15 @@ impl<'a> Parser<'a> { // code copied from parse_macro_use_or_failure... abstraction! if let Some(mac) = self.parse_assoc_macro_invoc("impl", Some(vis), at_end)? { // method macro - Ok((keywords::Invalid.ident(), vec![], ast::Generics::default(), + Ok((Ident::invalid(), vec![], ast::Generics::default(), ast::ImplItemKind::Macro(mac))) } else { let (constness, unsafety, asyncness, abi) = self.parse_fn_front_matter()?; let ident = self.parse_ident()?; let mut generics = self.parse_generics()?; - let decl = self.parse_fn_decl_with_self(|p| p.parse_arg())?; + let decl = self.parse_fn_decl_with_self(|p| { + p.parse_arg_general(true, false, |_| true) + })?; generics.where_clause = self.parse_where_clause()?; *at_end = true; let (inner_attrs, body) = self.parse_inner_attrs_and_block()?; @@ -6656,6 +5856,23 @@ impl<'a> Parser<'a> { self.expect(&token::OpenDelim(token::Brace))?; let mut trait_items = vec![]; while !self.eat(&token::CloseDelim(token::Brace)) { + if let token::DocComment(_) = self.token.kind { + if self.look_ahead(1, + |tok| tok == &token::CloseDelim(token::Brace)) { + self.diagnostic().struct_span_err_with_code( + self.token.span, + "found a documentation comment that doesn't document anything", + DiagnosticId::Error("E0584".into()), + ) + .help( + "doc comments must come before what they document, maybe a \ + comment was intended with `//`?", + ) + .emit(); + self.bump(); + continue; + } + } let mut at_end = false; match self.parse_trait_item(&mut at_end) { Ok(item) => trait_items.push(item), @@ -6693,7 +5910,7 @@ impl<'a> Parser<'a> { self.look_ahead(1, |t| t.is_lifetime() || t.is_ident()) && self.look_ahead(2, |t| t == &token::Gt || t == &token::Comma || t == &token::Colon || t == &token::Eq) || - self.look_ahead(1, |t| t.is_keyword(keywords::Const))) + self.is_keyword_ahead(1, &[kw::Const])) } fn parse_impl_body(&mut self) -> PResult<'a, (Vec<ImplItem>, Vec<Attribute>)> { @@ -6743,16 +5960,24 @@ impl<'a> Parser<'a> { }; // Parse both types and traits as a type, then reinterpret if necessary. - let ty_first = self.parse_ty()?; + let err_path = |span| ast::Path::from_ident(Ident::new(kw::Invalid, span)); + let ty_first = if self.token.is_keyword(kw::For) && + self.look_ahead(1, |t| t != &token::Lt) { + let span = self.prev_span.between(self.token.span); + self.struct_span_err(span, "missing trait in a trait impl").emit(); + P(Ty { node: TyKind::Path(None, err_path(span)), span, id: ast::DUMMY_NODE_ID }) + } else { + self.parse_ty()? + }; // If `for` is missing we try to recover. - let has_for = self.eat_keyword(keywords::For); - let missing_for_span = self.prev_span.between(self.span); + let has_for = self.eat_keyword(kw::For); + let missing_for_span = self.prev_span.between(self.token.span); let ty_second = if self.token == token::DotDot { // We need to report this error after `cfg` expansion for compatibility reasons self.bump(); // `..`, do not add it to expected tokens - Some(P(Ty { node: TyKind::Err, span: self.prev_span, id: ast::DUMMY_NODE_ID })) + Some(DummyResult::raw_ty(self.prev_span, true)) } else if has_for || self.token.can_begin_type() { Some(self.parse_ty()?) } else { @@ -6782,7 +6007,7 @@ impl<'a> Parser<'a> { TyKind::Path(None, path) => path, _ => { self.span_err(ty_first.span, "expected a trait, found type"); - ast::Path::from_ident(Ident::new(keywords::Invalid.name(), ty_first.span)) + err_path(ty_first.span) } }; let trait_ref = TraitRef { path, ref_id: ty_first.id }; @@ -6797,11 +6022,11 @@ impl<'a> Parser<'a> { } }; - Ok((keywords::Invalid.ident(), item_kind, Some(attrs))) + Ok((Ident::invalid(), item_kind, Some(attrs))) } fn parse_late_bound_lifetime_defs(&mut self) -> PResult<'a, Vec<GenericParam>> { - if self.eat_keyword(keywords::For) { + if self.eat_keyword(kw::For) { self.expect_lt()?; let params = self.parse_generic_params()?; self.expect_gt()?; @@ -6833,21 +6058,23 @@ impl<'a> Parser<'a> { // Otherwise if we look ahead and see a paren we parse a tuple-style // struct. - let vdata = if self.token.is_keyword(keywords::Where) { + let vdata = if self.token.is_keyword(kw::Where) { generics.where_clause = self.parse_where_clause()?; if self.eat(&token::Semi) { // If we see a: `struct Foo<T> where T: Copy;` style decl. VariantData::Unit(ast::DUMMY_NODE_ID) } else { // If we see: `struct Foo<T> where T: Copy { ... }` - VariantData::Struct(self.parse_record_struct_body()?, ast::DUMMY_NODE_ID) + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) } // No `where` so: `struct Foo<T>;` } else if self.eat(&token::Semi) { VariantData::Unit(ast::DUMMY_NODE_ID) // Record-style struct definition } else if self.token == token::OpenDelim(token::Brace) { - VariantData::Struct(self.parse_record_struct_body()?, ast::DUMMY_NODE_ID) + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) // Tuple-style struct definition with optional where-clause. } else if self.token == token::OpenDelim(token::Paren) { let body = VariantData::Tuple(self.parse_tuple_struct_body()?, ast::DUMMY_NODE_ID); @@ -6860,7 +6087,7 @@ impl<'a> Parser<'a> { "expected `where`, `{{`, `(`, or `;` after struct name, found {}", token_str )); - err.span_label(self.span, "expected `where`, `{`, `(`, or `;` after struct name"); + err.span_label(self.token.span, "expected `where`, `{`, `(`, or `;` after struct name"); return Err(err); }; @@ -6873,48 +6100,34 @@ impl<'a> Parser<'a> { let mut generics = self.parse_generics()?; - let vdata = if self.token.is_keyword(keywords::Where) { + let vdata = if self.token.is_keyword(kw::Where) { generics.where_clause = self.parse_where_clause()?; - VariantData::Struct(self.parse_record_struct_body()?, ast::DUMMY_NODE_ID) + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) } else if self.token == token::OpenDelim(token::Brace) { - VariantData::Struct(self.parse_record_struct_body()?, ast::DUMMY_NODE_ID) + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) } else { let token_str = self.this_token_descr(); let mut err = self.fatal(&format!( "expected `where` or `{{` after union name, found {}", token_str)); - err.span_label(self.span, "expected `where` or `{` after union name"); + err.span_label(self.token.span, "expected `where` or `{` after union name"); return Err(err); }; Ok((class_name, ItemKind::Union(vdata, generics), None)) } - fn consume_block(&mut self, delim: token::DelimToken) { - let mut brace_depth = 0; - loop { - if self.eat(&token::OpenDelim(delim)) { - brace_depth += 1; - } else if self.eat(&token::CloseDelim(delim)) { - if brace_depth == 0 { - return; - } else { - brace_depth -= 1; - continue; - } - } else if self.token == token::Eof || self.eat(&token::CloseDelim(token::NoDelim)) { - return; - } else { - self.bump(); - } - } - } - - fn parse_record_struct_body(&mut self) -> PResult<'a, Vec<StructField>> { + fn parse_record_struct_body( + &mut self, + ) -> PResult<'a, (Vec<StructField>, /* recovered */ bool)> { let mut fields = Vec::new(); + let mut recovered = false; if self.eat(&token::OpenDelim(token::Brace)) { while self.token != token::CloseDelim(token::Brace) { let field = self.parse_struct_decl_field().map_err(|e| { self.recover_stmt(); + recovered = true; e }); match field { @@ -6929,36 +6142,30 @@ impl<'a> Parser<'a> { let token_str = self.this_token_descr(); let mut err = self.fatal(&format!( "expected `where`, or `{{` after struct name, found {}", token_str)); - err.span_label(self.span, "expected `where`, or `{` after struct name"); + err.span_label(self.token.span, "expected `where`, or `{` after struct name"); return Err(err); } - Ok(fields) + Ok((fields, recovered)) } fn parse_tuple_struct_body(&mut self) -> PResult<'a, Vec<StructField>> { // This is the case where we find `struct Foo<T>(T) where T: Copy;` // Unit like structs are handled in parse_item_struct function - let fields = self.parse_unspanned_seq( - &token::OpenDelim(token::Paren), - &token::CloseDelim(token::Paren), - SeqSep::trailing_allowed(token::Comma), - |p| { - let attrs = p.parse_outer_attributes()?; - let lo = p.span; - let vis = p.parse_visibility(true)?; - let ty = p.parse_ty()?; - Ok(StructField { - span: lo.to(ty.span), - vis, - ident: None, - id: ast::DUMMY_NODE_ID, - ty, - attrs, - }) - })?; - - Ok(fields) + self.parse_paren_comma_seq(|p| { + let attrs = p.parse_outer_attributes()?; + let lo = p.token.span; + let vis = p.parse_visibility(true)?; + let ty = p.parse_ty()?; + Ok(StructField { + span: lo.to(ty.span), + vis, + ident: None, + id: ast::DUMMY_NODE_ID, + ty, + attrs, + }) + }).map(|(r, _)| r) } /// Parses a structure field declaration. @@ -6972,14 +6179,14 @@ impl<'a> Parser<'a> { if self.token == token::Comma { seen_comma = true; } - match self.token { + match self.token.kind { token::Comma => { self.bump(); } token::CloseDelim(token::Brace) => {} token::DocComment(_) => { let previous_span = self.prev_span; - let mut err = self.span_fatal_err(self.span, Error::UselessDocComment); + let mut err = self.span_fatal_err(self.token.span, Error::UselessDocComment); self.bump(); // consume the doc comment let comma_after_doc_seen = self.eat(&token::Comma); // `seen_comma` is always false, because we are inside doc block @@ -7026,7 +6233,7 @@ impl<'a> Parser<'a> { /// Parses an element of a struct declaration. fn parse_struct_decl_field(&mut self) -> PResult<'a, StructField> { let attrs = self.parse_outer_attributes()?; - let lo = self.span; + let lo = self.token.span; let vis = self.parse_visibility(false)?; self.parse_single_struct_field(lo, vis, attrs) } @@ -7039,17 +6246,17 @@ impl<'a> Parser<'a> { pub fn parse_visibility(&mut self, can_take_tuple: bool) -> PResult<'a, Visibility> { maybe_whole!(self, NtVis, |x| x); - self.expected_tokens.push(TokenType::Keyword(keywords::Crate)); + self.expected_tokens.push(TokenType::Keyword(kw::Crate)); if self.is_crate_vis() { self.bump(); // `crate` return Ok(respan(self.prev_span, VisibilityKind::Crate(CrateSugar::JustCrate))); } - if !self.eat_keyword(keywords::Pub) { + if !self.eat_keyword(kw::Pub) { // We need a span for our `Spanned<VisibilityKind>`, but there's inherently no // keyword to grab a span from for inherited visibility; an empty span at the // beginning of the current token would seem to be the "Schelling span". - return Ok(respan(self.span.shrink_to_lo(), VisibilityKind::Inherited)) + return Ok(respan(self.token.span.shrink_to_lo(), VisibilityKind::Inherited)) } let lo = self.prev_span; @@ -7058,7 +6265,9 @@ impl<'a> Parser<'a> { // `()` or a tuple might be allowed. For example, `struct Struct(pub (), pub (usize));`. // Because of this, we only `bump` the `(` if we're assured it is appropriate to do so // by the following tokens. - if self.look_ahead(1, |t| t.is_keyword(keywords::Crate)) { + if self.is_keyword_ahead(1, &[kw::Crate]) && + self.look_ahead(2, |t| t != &token::ModSep) // account for `pub(crate::foo)` + { // `pub(crate)` self.bump(); // `(` self.bump(); // `crate` @@ -7068,7 +6277,7 @@ impl<'a> Parser<'a> { VisibilityKind::Crate(CrateSugar::PubCrate), ); return Ok(vis) - } else if self.look_ahead(1, |t| t.is_keyword(keywords::In)) { + } else if self.is_keyword_ahead(1, &[kw::In]) { // `pub(in path)` self.bump(); // `(` self.bump(); // `in` @@ -7080,8 +6289,7 @@ impl<'a> Parser<'a> { }); return Ok(vis) } else if self.look_ahead(2, |t| t == &token::CloseDelim(token::Paren)) && - self.look_ahead(1, |t| t.is_keyword(keywords::Super) || - t.is_keyword(keywords::SelfLower)) + self.is_keyword_ahead(1, &[kw::Super, kw::SelfLower]) { // `pub(self)` or `pub(super)` self.bump(); // `(` @@ -7101,15 +6309,18 @@ impl<'a> Parser<'a> { `pub(super)`: visible only in the current module's parent `pub(in path::to::module)`: visible only on the specified path"##; let path = self.parse_path(PathStyle::Mod)?; - let sp = self.prev_span; + let sp = path.span; let help_msg = format!("make this visible only to module `{}` with `in`", path); self.expect(&token::CloseDelim(token::Paren))?; // `)` - let mut err = struct_span_err!(self.sess.span_diagnostic, sp, E0704, "{}", msg); - err.help(suggestion); - err.span_suggestion( - sp, &help_msg, format!("in {}", path), Applicability::MachineApplicable - ); - err.emit(); // emit diagnostic, but continue with public visibility + struct_span_err!(self.sess.span_diagnostic, sp, E0704, "{}", msg) + .help(suggestion) + .span_suggestion( + sp, + &help_msg, + format!("in {}", path), + Applicability::MachineApplicable, + ) + .emit(); // emit diagnostic, but continue with public visibility } } @@ -7119,14 +6330,17 @@ impl<'a> Parser<'a> { /// Parses defaultness (i.e., `default` or nothing). fn parse_defaultness(&mut self) -> Defaultness { // `pub` is included for better error messages - if self.check_keyword(keywords::Default) && - self.look_ahead(1, |t| t.is_keyword(keywords::Impl) || - t.is_keyword(keywords::Const) || - t.is_keyword(keywords::Fn) || - t.is_keyword(keywords::Unsafe) || - t.is_keyword(keywords::Extern) || - t.is_keyword(keywords::Type) || - t.is_keyword(keywords::Pub)) { + if self.check_keyword(kw::Default) && + self.is_keyword_ahead(1, &[ + kw::Impl, + kw::Const, + kw::Fn, + kw::Unsafe, + kw::Extern, + kw::Type, + kw::Pub, + ]) + { self.bump(); // `default` Defaultness::Default } else { @@ -7134,39 +6348,8 @@ impl<'a> Parser<'a> { } } - fn maybe_consume_incorrect_semicolon(&mut self, items: &[P<Item>]) -> bool { - if self.eat(&token::Semi) { - let mut err = self.struct_span_err(self.prev_span, "expected item, found `;`"); - err.span_suggestion_short( - self.prev_span, - "remove this semicolon", - String::new(), - Applicability::MachineApplicable, - ); - if !items.is_empty() { - let previous_item = &items[items.len()-1]; - let previous_item_kind_name = match previous_item.node { - // say "braced struct" because tuple-structs and - // braceless-empty-struct declarations do take a semicolon - ItemKind::Struct(..) => Some("braced struct"), - ItemKind::Enum(..) => Some("enum"), - ItemKind::Trait(..) => Some("trait"), - ItemKind::Union(..) => Some("union"), - _ => None, - }; - if let Some(name) = previous_item_kind_name { - err.help(&format!("{} declarations are not followed by a semicolon", name)); - } - } - err.emit(); - true - } else { - false - } - } - /// Given a termination token, parses all of the items in a module. - fn parse_mod_items(&mut self, term: &token::Token, inner_lo: Span) -> PResult<'a, Mod> { + fn parse_mod_items(&mut self, term: &TokenKind, inner_lo: Span) -> PResult<'a, Mod> { let mut items = vec![]; while let Some(item) = self.parse_item()? { items.push(item); @@ -7177,12 +6360,12 @@ impl<'a> Parser<'a> { let token_str = self.this_token_descr(); if !self.maybe_consume_incorrect_semicolon(&items) { let mut err = self.fatal(&format!("expected item, found {}", token_str)); - err.span_label(self.span, "expected item"); + err.span_label(self.token.span, "expected item"); return Err(err); } } - let hi = if self.span.is_dummy() { + let hi = if self.token.span.is_dummy() { inner_lo } else { self.prev_span @@ -7221,7 +6404,7 @@ impl<'a> Parser<'a> { (!self.cfg_mods || strip_unconfigured.in_cfg(&outer_attrs), outer_attrs) }; - let id_span = self.span; + let id_span = self.token.span; let id = self.parse_ident()?; if self.eat(&token::Semi) { if in_cfg && self.recurse_into_file_modules { @@ -7232,21 +6415,15 @@ impl<'a> Parser<'a> { self.eval_src_mod(path, directory_ownership, id.to_string(), id_span)?; // Record that we fetched the mod from an external file if warn { - let attr = Attribute { - id: attr::mk_attr_id(), - style: ast::AttrStyle::Outer, - path: ast::Path::from_ident(Ident::from_str("warn_directory_ownership")), - tokens: TokenStream::empty(), - is_sugared_doc: false, - span: syntax_pos::DUMMY_SP, - }; + let attr = attr::mk_attr_outer( + attr::mk_word_item(Ident::with_empty_ctxt(sym::warn_directory_ownership))); attr::mark_known(&attr); attrs.push(attr); } Ok((id, ItemKind::Mod(module), Some(attrs))) } else { let placeholder = ast::Mod { - inner: syntax_pos::DUMMY_SP, + inner: DUMMY_SP, items: Vec::new(), inline: false }; @@ -7257,7 +6434,7 @@ impl<'a> Parser<'a> { self.push_directory(id, &outer_attrs); self.expect(&token::OpenDelim(token::Brace))?; - let mod_inner_lo = self.span; + let mod_inner_lo = self.token.span; let attrs = self.parse_inner_attributes()?; let module = self.parse_mod_items(&token::CloseDelim(token::Brace), mod_inner_lo)?; @@ -7267,7 +6444,7 @@ impl<'a> Parser<'a> { } fn push_directory(&mut self, id: Ident, attrs: &[Attribute]) { - if let Some(path) = attr::first_attr_value_str_by_name(attrs, "path") { + if let Some(path) = attr::first_attr_value_str_by_name(attrs, sym::path) { self.directory.path.to_mut().push(&path.as_str()); self.directory.ownership = DirectoryOwnership::Owned { relative: None }; } else { @@ -7287,7 +6464,7 @@ impl<'a> Parser<'a> { } pub fn submod_path_from_attr(attrs: &[Attribute], dir_path: &Path) -> Option<PathBuf> { - if let Some(s) = attr::first_attr_value_str_by_name(attrs, "path") { + if let Some(s) = attr::first_attr_value_str_by_name(attrs, sym::path) { let s = s.as_str(); // On windows, the base path might have the form @@ -7446,12 +6623,13 @@ impl<'a> Parser<'a> { } /// Reads a module from a source file. - fn eval_src_mod(&mut self, - path: PathBuf, - directory_ownership: DirectoryOwnership, - name: String, - id_sp: Span) - -> PResult<'a, (ast::Mod, Vec<Attribute> )> { + fn eval_src_mod( + &mut self, + path: PathBuf, + directory_ownership: DirectoryOwnership, + name: String, + id_sp: Span, + ) -> PResult<'a, (ast::Mod, Vec<Attribute>)> { let mut included_mod_stack = self.sess.included_mod_stack.borrow_mut(); if let Some(i) = included_mod_stack.iter().position(|p| *p == path) { let mut err = String::from("circular modules: "); @@ -7469,7 +6647,7 @@ impl<'a> Parser<'a> { let mut p0 = new_sub_parser_from_file(self.sess, &path, directory_ownership, Some(name), id_sp); p0.cfg_mods = self.cfg_mods; - let mod_inner_lo = p0.span; + let mod_inner_lo = p0.token.span; let mod_attrs = p0.parse_inner_attributes()?; let mut m0 = p0.parse_mod_items(&token::Eof, mod_inner_lo)?; m0.inline = false; @@ -7478,15 +6656,20 @@ impl<'a> Parser<'a> { } /// Parses a function declaration from a foreign module. - fn parse_item_foreign_fn(&mut self, vis: ast::Visibility, lo: Span, attrs: Vec<Attribute>) - -> PResult<'a, ForeignItem> { - self.expect_keyword(keywords::Fn)?; + fn parse_item_foreign_fn( + &mut self, + vis: ast::Visibility, + lo: Span, + attrs: Vec<Attribute>, + extern_sp: Span, + ) -> PResult<'a, ForeignItem> { + self.expect_keyword(kw::Fn)?; let (ident, mut generics) = self.parse_fn_header()?; let decl = self.parse_fn_decl(true)?; generics.where_clause = self.parse_where_clause()?; - let hi = self.span; - self.expect(&token::Semi)?; + let hi = self.token.span; + self.parse_semi_or_incorrect_foreign_fn_body(&ident, extern_sp)?; Ok(ast::ForeignItem { ident, attrs, @@ -7501,11 +6684,11 @@ impl<'a> Parser<'a> { /// Assumes that the `static` keyword is already parsed. fn parse_item_foreign_static(&mut self, vis: ast::Visibility, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, ForeignItem> { - let mutbl = self.eat_keyword(keywords::Mut); + let mutbl = self.parse_mutability(); let ident = self.parse_ident()?; self.expect(&token::Colon)?; let ty = self.parse_ty()?; - let hi = self.span; + let hi = self.token.span; self.expect(&token::Semi)?; Ok(ForeignItem { ident, @@ -7520,18 +6703,18 @@ impl<'a> Parser<'a> { /// Parses a type from a foreign module. fn parse_item_foreign_type(&mut self, vis: ast::Visibility, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, ForeignItem> { - self.expect_keyword(keywords::Type)?; + self.expect_keyword(kw::Type)?; let ident = self.parse_ident()?; - let hi = self.span; + let hi = self.token.span; self.expect(&token::Semi)?; Ok(ast::ForeignItem { - ident: ident, - attrs: attrs, + ident, + attrs, node: ForeignItemKind::Ty, id: ast::DUMMY_NODE_ID, span: lo.to(hi), - vis: vis + vis }) } @@ -7539,7 +6722,7 @@ impl<'a> Parser<'a> { let error_msg = "crate name using dashes are not valid in `extern crate` statements"; let suggestion_msg = "if the original crate name uses dashes you need to use underscores \ in the code"; - let mut ident = if self.token.is_keyword(keywords::SelfLower) { + let mut ident = if self.token.is_keyword(kw::SelfLower) { self.parse_path_segment_ident() } else { self.parse_ident() @@ -7548,7 +6731,7 @@ impl<'a> Parser<'a> { let mut replacement = vec![]; let mut fixed_crate_name = false; // Accept `extern crate name-like-this` for better diagnostics - let dash = token::Token::BinOp(token::BinOpToken::Minus); + let dash = token::BinOp(token::BinOpToken::Minus); if self.token == dash { // Do not include `-` as part of the expected tokens list while self.eat(&dash) { fixed_crate_name = true; @@ -7564,14 +6747,10 @@ impl<'a> Parser<'a> { } ident = Ident::from_str(&fixed_name).with_span_pos(fixed_name_sp); - let mut err = self.struct_span_err(fixed_name_sp, error_msg); - err.span_label(fixed_name_sp, "dash-separated idents are not valid"); - err.multipart_suggestion( - suggestion_msg, - replacement, - Applicability::MachineApplicable, - ); - err.emit(); + self.struct_span_err(fixed_name_sp, error_msg) + .span_label(fixed_name_sp, "dash-separated idents are not valid") + .multipart_suggestion(suggestion_msg, replacement, Applicability::MachineApplicable) + .emit(); } Ok(ident) } @@ -7613,12 +6792,14 @@ impl<'a> Parser<'a> { /// extern "C" {} /// extern {} /// ``` - fn parse_item_foreign_mod(&mut self, - lo: Span, - opt_abi: Option<Abi>, - visibility: Visibility, - mut attrs: Vec<Attribute>) - -> PResult<'a, P<Item>> { + fn parse_item_foreign_mod( + &mut self, + lo: Span, + opt_abi: Option<Abi>, + visibility: Visibility, + mut attrs: Vec<Attribute>, + extern_sp: Span, + ) -> PResult<'a, P<Item>> { self.expect(&token::OpenDelim(token::Brace))?; let abi = opt_abi.unwrap_or(Abi::C); @@ -7627,7 +6808,7 @@ impl<'a> Parser<'a> { let mut foreign_items = vec![]; while !self.eat(&token::CloseDelim(token::Brace)) { - foreign_items.push(self.parse_foreign_item()?); + foreign_items.push(self.parse_foreign_item(extern_sp)?); } let prev_span = self.prev_span; @@ -7635,44 +6816,33 @@ impl<'a> Parser<'a> { abi, items: foreign_items }; - let invalid = keywords::Invalid.ident(); + let invalid = Ident::invalid(); Ok(self.mk_item(lo.to(prev_span), invalid, ItemKind::ForeignMod(m), visibility, attrs)) } - /// Parses `type Foo = Bar;` - /// or - /// `existential type Foo: Bar;` - /// or - /// `return `None`` + /// Parses `type Foo = Bar;` or returns `None` /// without modifying the parser state. fn eat_type(&mut self) -> Option<PResult<'a, (Ident, AliasKind, ast::Generics)>> { // This parses the grammar: // Ident ["<"...">"] ["where" ...] ("=" | ":") Ty ";" - if self.check_keyword(keywords::Type) || - self.check_keyword(keywords::Existential) && - self.look_ahead(1, |t| t.is_keyword(keywords::Type)) { - let existential = self.eat_keyword(keywords::Existential); - assert!(self.eat_keyword(keywords::Type)); - Some(self.parse_existential_or_alias(existential)) + if self.eat_keyword(kw::Type) { + Some(self.parse_type_alias()) } else { None } } - /// Parses a type alias or existential type. - fn parse_existential_or_alias( - &mut self, - existential: bool, - ) -> PResult<'a, (Ident, AliasKind, ast::Generics)> { + /// Parses a type alias or opaque type. + fn parse_type_alias(&mut self) -> PResult<'a, (Ident, AliasKind, ast::Generics)> { let ident = self.parse_ident()?; let mut tps = self.parse_generics()?; tps.where_clause = self.parse_where_clause()?; - let alias = if existential { - self.expect(&token::Colon)?; + self.expect(&token::Eq)?; + let alias = if self.check_keyword(kw::Impl) { + self.bump(); let bounds = self.parse_generic_bounds(Some(self.prev_span))?; - AliasKind::Existential(bounds) + AliasKind::OpaqueTy(bounds) } else { - self.expect(&token::Eq)?; let ty = self.parse_ty()?; AliasKind::Weak(ty) }; @@ -7683,58 +6853,61 @@ impl<'a> Parser<'a> { /// Parses the part of an enum declaration following the `{`. fn parse_enum_def(&mut self, _generics: &ast::Generics) -> PResult<'a, EnumDef> { let mut variants = Vec::new(); - let mut all_nullary = true; - let mut any_disr = vec![]; while self.token != token::CloseDelim(token::Brace) { let variant_attrs = self.parse_outer_attributes()?; - let vlo = self.span; + let vlo = self.token.span; - let struct_def; - let mut disr_expr = None; + self.eat_bad_pub(); let ident = self.parse_ident()?; - if self.check(&token::OpenDelim(token::Brace)) { + + let struct_def = if self.check(&token::OpenDelim(token::Brace)) { // Parse a struct variant. - all_nullary = false; - struct_def = VariantData::Struct(self.parse_record_struct_body()?, - ast::DUMMY_NODE_ID); + let (fields, recovered) = self.parse_record_struct_body()?; + VariantData::Struct(fields, recovered) } else if self.check(&token::OpenDelim(token::Paren)) { - all_nullary = false; - struct_def = VariantData::Tuple(self.parse_tuple_struct_body()?, - ast::DUMMY_NODE_ID); - } else if self.eat(&token::Eq) { - disr_expr = Some(AnonConst { + VariantData::Tuple( + self.parse_tuple_struct_body()?, + ast::DUMMY_NODE_ID, + ) + } else { + VariantData::Unit(ast::DUMMY_NODE_ID) + }; + + let disr_expr = if self.eat(&token::Eq) { + Some(AnonConst { id: ast::DUMMY_NODE_ID, value: self.parse_expr()?, - }); - if let Some(sp) = disr_expr.as_ref().map(|c| c.value.span) { - any_disr.push(sp); - } - struct_def = VariantData::Unit(ast::DUMMY_NODE_ID); + }) } else { - struct_def = VariantData::Unit(ast::DUMMY_NODE_ID); - } + None + }; let vr = ast::Variant_ { ident, + id: ast::DUMMY_NODE_ID, attrs: variant_attrs, data: struct_def, disr_expr, }; variants.push(respan(vlo.to(self.prev_span), vr)); - if !self.eat(&token::Comma) { break; } - } - self.expect(&token::CloseDelim(token::Brace))?; - if !any_disr.is_empty() && !all_nullary { - let mut err =self.struct_span_err( - any_disr.clone(), - "discriminator values can only be used with a field-less enum", - ); - for sp in any_disr { - err.span_label(sp, "only valid in field-less enums"); + if !self.eat(&token::Comma) { + if self.token.is_ident() && !self.token.is_reserved_ident() { + let sp = self.sess.source_map().next_point(self.prev_span); + self.struct_span_err(sp, "missing comma") + .span_suggestion_short( + sp, + "missing comma", + ",".to_owned(), + Applicability::MaybeIncorrect, + ) + .emit(); + } else { + break; + } } - err.emit(); } + self.expect(&token::CloseDelim(token::Brace))?; Ok(ast::EnumDef { variants }) } @@ -7757,24 +6930,26 @@ impl<'a> Parser<'a> { /// Parses a string as an ABI spec on an extern type or module. Consumes /// the `extern` keyword, if one is found. fn parse_opt_abi(&mut self) -> PResult<'a, Option<Abi>> { - match self.token { - token::Literal(token::Str_(s), suf) | token::Literal(token::StrRaw(s, _), suf) => { - let sp = self.span; - self.expect_no_suffix(sp, "ABI spec", suf); + match self.token.kind { + token::Literal(token::Lit { kind: token::Str, symbol, suffix }) | + token::Literal(token::Lit { kind: token::StrRaw(..), symbol, suffix }) => { + let sp = self.token.span; + self.expect_no_suffix(sp, "an ABI spec", suffix); self.bump(); - match abi::lookup(&s.as_str()) { + match abi::lookup(&symbol.as_str()) { Some(abi) => Ok(Some(abi)), None => { let prev_span = self.prev_span; - let mut err = struct_span_err!( + struct_span_err!( self.sess.span_diagnostic, prev_span, E0703, "invalid ABI: found `{}`", - s); - err.span_label(prev_span, "invalid ABI"); - err.help(&format!("valid ABIs: {}", abi::all_names().join(", "))); - err.emit(); + symbol + ) + .span_label(prev_span, "invalid ABI") + .help(&format!("valid ABIs: {}", abi::all_names().join(", "))) + .emit(); Ok(None) } } @@ -7785,13 +6960,13 @@ impl<'a> Parser<'a> { } fn is_static_global(&mut self) -> bool { - if self.check_keyword(keywords::Static) { + if self.check_keyword(kw::Static) { // Check if this could be a closure !self.look_ahead(1, |token| { - if token.is_keyword(keywords::Move) { + if token.is_keyword(kw::Move) { return true; } - match *token { + match token.kind { token::BinOp(token::Or) | token::OrOr => true, _ => false, } @@ -7854,28 +7029,30 @@ impl<'a> Parser<'a> { Some(P(item)) }); - let lo = self.span; + let lo = self.token.span; let visibility = self.parse_visibility(false)?; - if self.eat_keyword(keywords::Use) { + if self.eat_keyword(kw::Use) { // USE ITEM let item_ = ItemKind::Use(P(self.parse_use_tree()?)); self.expect(&token::Semi)?; let span = lo.to(self.prev_span); - let item = self.mk_item(span, keywords::Invalid.ident(), item_, visibility, attrs); + let item = + self.mk_item(span, Ident::invalid(), item_, visibility, attrs); return Ok(Some(item)); } - if self.eat_keyword(keywords::Extern) { - if self.eat_keyword(keywords::Crate) { + if self.eat_keyword(kw::Extern) { + let extern_sp = self.prev_span; + if self.eat_keyword(kw::Crate) { return Ok(Some(self.parse_item_extern_crate(lo, visibility, attrs)?)); } let opt_abi = self.parse_opt_abi()?; - if self.eat_keyword(keywords::Fn) { + if self.eat_keyword(kw::Fn) { // EXTERN FUNCTION ITEM let fn_span = self.prev_span; let abi = opt_abi.unwrap_or(Abi::C); @@ -7892,7 +7069,9 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } else if self.check(&token::OpenDelim(token::Brace)) { - return Ok(Some(self.parse_item_foreign_mod(lo, opt_abi, visibility, attrs)?)); + return Ok(Some( + self.parse_item_foreign_mod(lo, opt_abi, visibility, attrs, extern_sp)?, + )); } self.unexpected()?; @@ -7901,7 +7080,7 @@ impl<'a> Parser<'a> { if self.is_static_global() { self.bump(); // STATIC ITEM - let m = if self.eat_keyword(keywords::Mut) { + let m = if self.eat_keyword(kw::Mut) { Mutability::Mutable } else { Mutability::Immutable @@ -7915,11 +7094,11 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } - if self.eat_keyword(keywords::Const) { + if self.eat_keyword(kw::Const) { let const_span = self.prev_span; - if self.check_keyword(keywords::Fn) - || (self.check_keyword(keywords::Unsafe) - && self.look_ahead(1, |t| t.is_keyword(keywords::Fn))) { + if self.check_keyword(kw::Fn) + || (self.check_keyword(kw::Unsafe) + && self.is_keyword_ahead(1, &[kw::Fn])) { // CONST FUNCTION ITEM let unsafety = self.parse_unsafety(); self.bump(); @@ -7938,18 +7117,17 @@ impl<'a> Parser<'a> { } // CONST ITEM - if self.eat_keyword(keywords::Mut) { + if self.eat_keyword(kw::Mut) { let prev_span = self.prev_span; - let mut err = self.diagnostic() - .struct_span_err(prev_span, "const globals cannot be mutable"); - err.span_label(prev_span, "cannot be mutable"); - err.span_suggestion( - const_span, - "you might want to declare a static instead", - "static".to_owned(), - Applicability::MaybeIncorrect, - ); - err.emit(); + self.struct_span_err(prev_span, "const globals cannot be mutable") + .span_label(prev_span, "cannot be mutable") + .span_suggestion( + const_span, + "you might want to declare a static instead", + "static".to_owned(), + Applicability::MaybeIncorrect, + ) + .emit(); } let (ident, item_, extra_attrs) = self.parse_item_const(None)?; let prev_span = self.prev_span; @@ -7961,55 +7139,45 @@ impl<'a> Parser<'a> { return Ok(Some(item)); } - // `unsafe async fn` or `async fn` - if ( - self.check_keyword(keywords::Unsafe) && - self.look_ahead(1, |t| t.is_keyword(keywords::Async)) - ) || ( - self.check_keyword(keywords::Async) && - self.look_ahead(1, |t| t.is_keyword(keywords::Fn)) - ) - { - // ASYNC FUNCTION ITEM - let unsafety = self.parse_unsafety(); - self.expect_keyword(keywords::Async)?; - let async_span = self.prev_span; - self.expect_keyword(keywords::Fn)?; - let fn_span = self.prev_span; - let (ident, item_, extra_attrs) = - self.parse_item_fn(unsafety, - respan(async_span, IsAsync::Async { - closure_id: ast::DUMMY_NODE_ID, - return_impl_trait_id: ast::DUMMY_NODE_ID, - }), - respan(fn_span, Constness::NotConst), - Abi::Rust)?; - let prev_span = self.prev_span; - let item = self.mk_item(lo.to(prev_span), - ident, - item_, - visibility, - maybe_append(attrs, extra_attrs)); - if self.span.rust_2015() { - self.diagnostic().struct_span_err_with_code( - async_span, - "`async fn` is not permitted in the 2015 edition", - DiagnosticId::Error("E0670".into()) - ).emit(); + // Parse `async unsafe? fn`. + if self.check_keyword(kw::Async) { + let async_span = self.token.span; + if self.is_keyword_ahead(1, &[kw::Fn]) + || self.is_keyword_ahead(2, &[kw::Fn]) + { + // ASYNC FUNCTION ITEM + self.bump(); // `async` + let unsafety = self.parse_unsafety(); // `unsafe`? + self.expect_keyword(kw::Fn)?; // `fn` + let fn_span = self.prev_span; + let (ident, item_, extra_attrs) = + self.parse_item_fn(unsafety, + respan(async_span, IsAsync::Async { + closure_id: ast::DUMMY_NODE_ID, + return_impl_trait_id: ast::DUMMY_NODE_ID, + }), + respan(fn_span, Constness::NotConst), + Abi::Rust)?; + let prev_span = self.prev_span; + let item = self.mk_item(lo.to(prev_span), + ident, + item_, + visibility, + maybe_append(attrs, extra_attrs)); + self.ban_async_in_2015(async_span); + return Ok(Some(item)); } - return Ok(Some(item)); } - if self.check_keyword(keywords::Unsafe) && - (self.look_ahead(1, |t| t.is_keyword(keywords::Trait)) || - self.look_ahead(1, |t| t.is_keyword(keywords::Auto))) + if self.check_keyword(kw::Unsafe) && + self.is_keyword_ahead(1, &[kw::Trait, kw::Auto]) { // UNSAFE TRAIT ITEM self.bump(); // `unsafe` - let is_auto = if self.eat_keyword(keywords::Trait) { + let is_auto = if self.eat_keyword(kw::Trait) { IsAuto::No } else { - self.expect_keyword(keywords::Auto)?; - self.expect_keyword(keywords::Trait)?; + self.expect_keyword(kw::Auto)?; + self.expect_keyword(kw::Trait)?; IsAuto::Yes }; let (ident, item_, extra_attrs) = @@ -8022,23 +7190,21 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } - if self.check_keyword(keywords::Impl) || - self.check_keyword(keywords::Unsafe) && - self.look_ahead(1, |t| t.is_keyword(keywords::Impl)) || - self.check_keyword(keywords::Default) && - self.look_ahead(1, |t| t.is_keyword(keywords::Impl)) || - self.check_keyword(keywords::Default) && - self.look_ahead(1, |t| t.is_keyword(keywords::Unsafe)) { + if self.check_keyword(kw::Impl) || + self.check_keyword(kw::Unsafe) && + self.is_keyword_ahead(1, &[kw::Impl]) || + self.check_keyword(kw::Default) && + self.is_keyword_ahead(1, &[kw::Impl, kw::Unsafe]) { // IMPL ITEM let defaultness = self.parse_defaultness(); let unsafety = self.parse_unsafety(); - self.expect_keyword(keywords::Impl)?; + self.expect_keyword(kw::Impl)?; let (ident, item, extra_attrs) = self.parse_item_impl(unsafety, defaultness)?; let span = lo.to(self.prev_span); return Ok(Some(self.mk_item(span, ident, item, visibility, maybe_append(attrs, extra_attrs)))); } - if self.check_keyword(keywords::Fn) { + if self.check_keyword(kw::Fn) { // FUNCTION ITEM self.bump(); let fn_span = self.prev_span; @@ -8055,18 +7221,18 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } - if self.check_keyword(keywords::Unsafe) + if self.check_keyword(kw::Unsafe) && self.look_ahead(1, |t| *t != token::OpenDelim(token::Brace)) { // UNSAFE FUNCTION ITEM self.bump(); // `unsafe` // `{` is also expected after `unsafe`, in case of error, include it in the diagnostic self.check(&token::OpenDelim(token::Brace)); - let abi = if self.eat_keyword(keywords::Extern) { + let abi = if self.eat_keyword(kw::Extern) { self.parse_opt_abi()?.unwrap_or(Abi::C) } else { Abi::Rust }; - self.expect_keyword(keywords::Fn)?; + self.expect_keyword(kw::Fn)?; let fn_span = self.prev_span; let (ident, item_, extra_attrs) = self.parse_item_fn(Unsafety::Unsafe, @@ -8081,7 +7247,7 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } - if self.eat_keyword(keywords::Mod) { + if self.eat_keyword(kw::Mod) { // MODULE ITEM let (ident, item_, extra_attrs) = self.parse_item_mod(&attrs[..])?; @@ -8097,8 +7263,8 @@ impl<'a> Parser<'a> { let (ident, alias, generics) = type_?; // TYPE ITEM let item_ = match alias { - AliasKind::Weak(ty) => ItemKind::Ty(ty, generics), - AliasKind::Existential(bounds) => ItemKind::Existential(bounds, generics), + AliasKind::Weak(ty) => ItemKind::TyAlias(ty, generics), + AliasKind::OpaqueTy(bounds) => ItemKind::OpaqueTy(bounds, generics), }; let prev_span = self.prev_span; let item = self.mk_item(lo.to(prev_span), @@ -8108,7 +7274,7 @@ impl<'a> Parser<'a> { attrs); return Ok(Some(item)); } - if self.eat_keyword(keywords::Enum) { + if self.eat_keyword(kw::Enum) { // ENUM ITEM let (ident, item_, extra_attrs) = self.parse_item_enum()?; let prev_span = self.prev_span; @@ -8119,15 +7285,15 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } - if self.check_keyword(keywords::Trait) - || (self.check_keyword(keywords::Auto) - && self.look_ahead(1, |t| t.is_keyword(keywords::Trait))) + if self.check_keyword(kw::Trait) + || (self.check_keyword(kw::Auto) + && self.is_keyword_ahead(1, &[kw::Trait])) { - let is_auto = if self.eat_keyword(keywords::Trait) { + let is_auto = if self.eat_keyword(kw::Trait) { IsAuto::No } else { - self.expect_keyword(keywords::Auto)?; - self.expect_keyword(keywords::Trait)?; + self.expect_keyword(kw::Auto)?; + self.expect_keyword(kw::Trait)?; IsAuto::Yes }; // TRAIT ITEM @@ -8141,7 +7307,7 @@ impl<'a> Parser<'a> { maybe_append(attrs, extra_attrs)); return Ok(Some(item)); } - if self.eat_keyword(keywords::Struct) { + if self.eat_keyword(kw::Struct) { // STRUCT ITEM let (ident, item_, extra_attrs) = self.parse_item_struct()?; let prev_span = self.prev_span; @@ -8178,9 +7344,9 @@ impl<'a> Parser<'a> { // // pub S {} // ^^^ `sp` points here - let sp = self.prev_span.between(self.span); - let full_sp = self.prev_span.to(self.span); - let ident_sp = self.span; + let sp = self.prev_span.between(self.token.span); + let full_sp = self.prev_span.to(self.token.span); + let ident_sp = self.token.span; if self.look_ahead(1, |t| *t == token::OpenDelim(token::Brace)) { // possible public struct definition where `struct` was forgotten let ident = self.parse_ident().unwrap(); @@ -8195,7 +7361,9 @@ impl<'a> Parser<'a> { } else if self.look_ahead(1, |t| *t == token::OpenDelim(token::Paren)) { let ident = self.parse_ident().unwrap(); self.bump(); // `(` - let kw_name = if let Ok(Some(_)) = self.parse_self_arg() { + let kw_name = if let Ok(Some(_)) = self.parse_self_arg_with_attrs() + .map_err(|mut e| e.cancel()) + { "method" } else { "function" @@ -8214,11 +7382,11 @@ impl<'a> Parser<'a> { } else { ("fn` or `struct", "function or struct", true) }; - self.consume_block(token::Brace); let msg = format!("missing `{}` for {} definition", kw, kw_name); let mut err = self.diagnostic().struct_span_err(sp, &msg); if !ambiguous { + self.consume_block(token::Brace); let suggestion = format!("add `{}` here to parse `{}` as a public {}", kw, ident, @@ -8227,7 +7395,7 @@ impl<'a> Parser<'a> { sp, &suggestion, format!(" {} ", kw), Applicability::MachineApplicable ); } else { - if let Ok(snippet) = self.sess.source_map().span_to_snippet(ident_sp) { + if let Ok(snippet) = self.span_to_snippet(ident_sp) { err.span_suggestion( full_sp, "if you meant to call a macro, try", @@ -8246,7 +7414,9 @@ impl<'a> Parser<'a> { self.eat_to_tokens(&[&token::Gt]); self.bump(); // `>` let (kw, kw_name, ambiguous) = if self.eat(&token::OpenDelim(token::Paren)) { - if let Ok(Some(_)) = self.parse_self_arg() { + if let Ok(Some(_)) = self.parse_self_arg_with_attrs() + .map_err(|mut e| e.cancel()) + { ("fn", "method", false) } else { ("fn", "function", false) @@ -8272,22 +7442,35 @@ impl<'a> Parser<'a> { self.parse_macro_use_or_failure(attrs, macros_allowed, attributes_allowed, lo, visibility) } + /// We are parsing `async fn`. If we are on Rust 2015, emit an error. + fn ban_async_in_2015(&self, async_span: Span) { + if async_span.rust_2015() { + self.diagnostic() + .struct_span_err_with_code( + async_span, + "`async fn` is not permitted in the 2015 edition", + DiagnosticId::Error("E0670".into()) + ) + .emit(); + } + } + /// Parses a foreign item. - crate fn parse_foreign_item(&mut self) -> PResult<'a, ForeignItem> { + crate fn parse_foreign_item(&mut self, extern_sp: Span) -> PResult<'a, ForeignItem> { maybe_whole!(self, NtForeignItem, |ni| ni); let attrs = self.parse_outer_attributes()?; - let lo = self.span; + let lo = self.token.span; let visibility = self.parse_visibility(false)?; // FOREIGN STATIC ITEM // Treat `const` as `static` for error recovery, but don't add it to expected tokens. - if self.check_keyword(keywords::Static) || self.token.is_keyword(keywords::Const) { - if self.token.is_keyword(keywords::Const) { + if self.check_keyword(kw::Static) || self.token.is_keyword(kw::Const) { + if self.token.is_keyword(kw::Const) { self.diagnostic() - .struct_span_err(self.span, "extern items cannot be `const`") + .struct_span_err(self.token.span, "extern items cannot be `const`") .span_suggestion( - self.span, + self.token.span, "try using a static value", "static".to_owned(), Applicability::MachineApplicable @@ -8297,11 +7480,11 @@ impl<'a> Parser<'a> { return Ok(self.parse_item_foreign_static(visibility, lo, attrs)?); } // FOREIGN FUNCTION ITEM - if self.check_keyword(keywords::Fn) { - return Ok(self.parse_item_foreign_fn(visibility, lo, attrs)?); + if self.check_keyword(kw::Fn) { + return Ok(self.parse_item_foreign_fn(visibility, lo, attrs, extern_sp)?); } // FOREIGN TYPE ITEM - if self.check_keyword(keywords::Type) { + if self.check_keyword(kw::Type) { return Ok(self.parse_item_foreign_type(visibility, lo, attrs)?); } @@ -8309,7 +7492,7 @@ impl<'a> Parser<'a> { Some(mac) => { Ok( ForeignItem { - ident: keywords::Invalid.ident(), + ident: Ident::invalid(), span: lo.to(self.prev_span), id: ast::DUMMY_NODE_ID, attrs, @@ -8338,40 +7521,31 @@ impl<'a> Parser<'a> { visibility: Visibility ) -> PResult<'a, Option<P<Item>>> { if macros_allowed && self.token.is_path_start() && - !(self.is_async_fn() && self.span.rust_2015()) { + !(self.is_async_fn() && self.token.span.rust_2015()) { // MACRO INVOCATION ITEM let prev_span = self.prev_span; self.complain_if_pub_macro(&visibility.node, prev_span); - let mac_lo = self.span; + let mac_lo = self.token.span; // item macro. - let pth = self.parse_path(PathStyle::Mod)?; + let path = self.parse_path(PathStyle::Mod)?; self.expect(&token::Not)?; - - // a 'special' identifier (like what `macro_rules!` uses) - // is optional. We should eventually unify invoc syntax - // and remove this. - let id = if self.token.is_ident() { - self.parse_ident()? - } else { - keywords::Invalid.ident() // no special identifier - }; - // eat a matched-delimiter token tree: let (delim, tts) = self.expect_delimited_token_tree()?; - if delim != MacDelimiter::Brace { - if !self.eat(&token::Semi) { - self.span_err(self.prev_span, - "macros that expand to items must either \ - be surrounded with braces or followed by \ - a semicolon"); - } + if delim != MacDelimiter::Brace && !self.eat(&token::Semi) { + self.report_invalid_macro_expansion_item(); } let hi = self.prev_span; - let mac = respan(mac_lo.to(hi), Mac_ { path: pth, tts, delim }); - let item = self.mk_item(lo.to(hi), id, ItemKind::Mac(mac), visibility, attrs); + let mac = respan(mac_lo.to(hi), Mac_ { + path, + tts, + delim, + prior_type_ascription: self.last_type_ascription, + }); + let item = + self.mk_item(lo.to(hi), Ident::invalid(), ItemKind::Mac(mac), visibility, attrs); return Ok(Some(item)); } @@ -8394,12 +7568,12 @@ impl<'a> Parser<'a> { at_end: &mut bool) -> PResult<'a, Option<Mac>> { if self.token.is_path_start() && - !(self.is_async_fn() && self.span.rust_2015()) { + !(self.is_async_fn() && self.token.span.rust_2015()) { let prev_span = self.prev_span; - let lo = self.span; - let pth = self.parse_path(PathStyle::Mod)?; + let lo = self.token.span; + let path = self.parse_path(PathStyle::Mod)?; - if pth.segments.len() == 1 { + if path.segments.len() == 1 { if !self.eat(&token::Not) { return Err(self.missing_assoc_item_kind_err(item_kind, prev_span)); } @@ -8419,7 +7593,12 @@ impl<'a> Parser<'a> { self.expect(&token::Semi)?; } - Ok(Some(respan(lo.to(self.prev_span), Mac_ { path: pth, tts, delim }))) + Ok(Some(respan(lo.to(self.prev_span), Mac_ { + path, + tts, + delim, + prior_type_ascription: self.last_type_ascription, + }))) } else { Ok(None) } @@ -8432,7 +7611,7 @@ impl<'a> Parser<'a> { let mut tokens = Vec::new(); let prev_collecting = match self.token_cursor.frame.last_token { LastToken::Collecting(ref mut list) => { - Some(mem::replace(list, Vec::new())) + Some(mem::take(list)) } LastToken::Was(ref mut last) => { tokens.extend(last.take()); @@ -8444,14 +7623,27 @@ impl<'a> Parser<'a> { let ret = f(self); let last_token = if self.token_cursor.stack.len() == prev { &mut self.token_cursor.frame.last_token + } else if self.token_cursor.stack.get(prev).is_none() { + // This can happen due to a bad interaction of two unrelated recovery mechanisms with + // mismatched delimiters *and* recovery lookahead on the likely typo `pub ident(` + // (#62881). + return Ok((ret?, TokenStream::new(vec![]))); } else { &mut self.token_cursor.stack[prev].last_token }; // Pull out the tokens that we've collected from the call to `f` above. let mut collected_tokens = match *last_token { - LastToken::Collecting(ref mut v) => mem::replace(v, Vec::new()), - LastToken::Was(_) => panic!("our vector went away?"), + LastToken::Collecting(ref mut v) => mem::take(v), + LastToken::Was(ref was) => { + let msg = format!("our vector went away? - found Was({:?})", was); + debug!("collect_tokens: {}", msg); + self.sess.span_diagnostic.delay_span_bug(self.token.span, &msg); + // This can happen due to a bad interaction of two unrelated recovery mechanisms + // with mismatched delimiters *and* recovery lookahead on the likely typo + // `pub ident(` (#62895, different but similar to the case above). + return Ok((ret?, TokenStream::new(vec![]))); + } }; // If we're not at EOF our current token wasn't actually consumed by @@ -8503,14 +7695,14 @@ impl<'a> Parser<'a> { /// PATH [`as` IDENT] /// ``` fn parse_use_tree(&mut self) -> PResult<'a, UseTree> { - let lo = self.span; + let lo = self.token.span; let mut prefix = ast::Path { segments: Vec::new(), span: lo.shrink_to_lo() }; let kind = if self.check(&token::OpenDelim(token::Brace)) || self.check(&token::BinOp(token::Star)) || self.is_import_coupler() { // `use *;` or `use ::*;` or `use {...};` or `use ::{...};` - let mod_sep_ctxt = self.span.ctxt(); + let mod_sep_ctxt = self.token.span.ctxt(); if self.eat(&token::ModSep) { prefix.segments.push( PathSegment::path_root(lo.shrink_to_lo().with_ctxt(mod_sep_ctxt)) @@ -8546,15 +7738,12 @@ impl<'a> Parser<'a> { /// USE_TREE_LIST = Ø | (USE_TREE `,`)* USE_TREE [`,`] /// ``` fn parse_use_tree_list(&mut self) -> PResult<'a, Vec<(UseTree, ast::NodeId)>> { - self.parse_unspanned_seq(&token::OpenDelim(token::Brace), - &token::CloseDelim(token::Brace), - SeqSep::trailing_allowed(token::Comma), |this| { - Ok((this.parse_use_tree()?, ast::DUMMY_NODE_ID)) - }) + self.parse_delim_comma_seq(token::Brace, |p| Ok((p.parse_use_tree()?, ast::DUMMY_NODE_ID))) + .map(|(r, _)| r) } fn parse_rename(&mut self) -> PResult<'a, Option<Ident>> { - if self.eat_keyword(keywords::As) { + if self.eat_keyword(kw::As) { self.parse_ident_or_underscore().map(Some) } else { Ok(None) @@ -8563,19 +7752,21 @@ impl<'a> Parser<'a> { /// Parses a source module as a crate. This is the main entry point for the parser. pub fn parse_crate_mod(&mut self) -> PResult<'a, Crate> { - let lo = self.span; + let lo = self.token.span; let krate = Ok(ast::Crate { attrs: self.parse_inner_attributes()?, module: self.parse_mod_items(&token::Eof, lo)?, - span: lo.to(self.span), + span: lo.to(self.token.span), }); krate } pub fn parse_optional_str(&mut self) -> Option<(Symbol, ast::StrStyle, Option<ast::Name>)> { - let ret = match self.token { - token::Literal(token::Str_(s), suf) => (s, ast::StrStyle::Cooked, suf), - token::Literal(token::StrRaw(s, n), suf) => (s, ast::StrStyle::Raw(n), suf), + let ret = match self.token.kind { + token::Literal(token::Lit { kind: token::Str, symbol, suffix }) => + (symbol, ast::StrStyle::Cooked, suffix), + token::Literal(token::Lit { kind: token::StrRaw(n), symbol, suffix }) => + (symbol, ast::StrStyle::Raw(n), suffix), _ => return None }; self.bump(); @@ -8586,24 +7777,43 @@ impl<'a> Parser<'a> { match self.parse_optional_str() { Some((s, style, suf)) => { let sp = self.prev_span; - self.expect_no_suffix(sp, "string literal", suf); + self.expect_no_suffix(sp, "a string literal", suf); Ok((s, style)) } _ => { let msg = "expected string literal"; let mut err = self.fatal(msg); - err.span_label(self.span, msg); + err.span_label(self.token.span, msg); Err(err) } } } + + fn report_invalid_macro_expansion_item(&self) { + self.struct_span_err( + self.prev_span, + "macros that expand to items must be delimited with braces or followed by a semicolon", + ).multipart_suggestion( + "change the delimiters to curly braces", + vec![ + (self.prev_span.with_hi(self.prev_span.lo() + BytePos(1)), String::from(" {")), + (self.prev_span.with_lo(self.prev_span.hi() - BytePos(1)), '}'.to_string()), + ], + Applicability::MaybeIncorrect, + ).span_suggestion( + self.sess.source_map.next_point(self.prev_span), + "add a semicolon", + ';'.to_string(), + Applicability::MaybeIncorrect, + ).emit(); + } } pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, handler: &errors::Handler) { for unmatched in unclosed_delims.iter() { let mut err = handler.struct_span_err(unmatched.found_span, &format!( "incorrect close delimiter: `{}`", - pprust::token_to_string(&token::Token::CloseDelim(unmatched.found_delim)), + pprust::token_kind_to_string(&token::CloseDelim(unmatched.found_delim)), )); err.span_label(unmatched.found_span, "incorrect close delimiter"); if let Some(sp) = unmatched.candidate_span { diff --git a/src/libsyntax/parse/tests.rs b/src/libsyntax/parse/tests.rs new file mode 100644 index 00000000000..e619fd17fb5 --- /dev/null +++ b/src/libsyntax/parse/tests.rs @@ -0,0 +1,339 @@ +use super::*; + +use crate::ast::{self, Name, PatKind}; +use crate::attr::first_attr_value_str_by_name; +use crate::parse::{ParseSess, PResult}; +use crate::parse::new_parser_from_source_str; +use crate::parse::token::Token; +use crate::print::pprust::item_to_string; +use crate::ptr::P; +use crate::source_map::FilePathMapping; +use crate::symbol::{kw, sym}; +use crate::tests::{matches_codepattern, string_to_stream, with_error_checking_parse}; +use crate::tokenstream::{DelimSpan, TokenTree, TokenStream}; +use crate::with_default_globals; +use syntax_pos::{Span, BytePos, Pos, NO_EXPANSION}; + +use std::path::PathBuf; + +/// Parses an item. +/// +/// Returns `Ok(Some(item))` when successful, `Ok(None)` when no item was found, and `Err` +/// when a syntax error occurred. +fn parse_item_from_source_str(name: FileName, source: String, sess: &ParseSess) + -> PResult<'_, Option<P<ast::Item>>> { + new_parser_from_source_str(sess, name, source).parse_item() +} + +// produce a syntax_pos::span +fn sp(a: u32, b: u32) -> Span { + Span::new(BytePos(a), BytePos(b), NO_EXPANSION) +} + +/// Parse a string, return an expr +fn string_to_expr(source_str : String) -> P<ast::Expr> { + let ps = ParseSess::new(FilePathMapping::empty()); + with_error_checking_parse(source_str, &ps, |p| { + p.parse_expr() + }) +} + +/// Parse a string, return an item +fn string_to_item(source_str : String) -> Option<P<ast::Item>> { + let ps = ParseSess::new(FilePathMapping::empty()); + with_error_checking_parse(source_str, &ps, |p| { + p.parse_item() + }) +} + +#[should_panic] +#[test] fn bad_path_expr_1() { + with_default_globals(|| { + string_to_expr("::abc::def::return".to_string()); + }) +} + +// check the token-tree-ization of macros +#[test] +fn string_to_tts_macro () { + with_default_globals(|| { + let tts: Vec<_> = + string_to_stream("macro_rules! zip (($a)=>($a))".to_string()).trees().collect(); + let tts: &[TokenTree] = &tts[..]; + + match tts { + [ + TokenTree::Token(Token { kind: token::Ident(name_macro_rules, false), .. }), + TokenTree::Token(Token { kind: token::Not, .. }), + TokenTree::Token(Token { kind: token::Ident(name_zip, false), .. }), + TokenTree::Delimited(_, macro_delim, macro_tts) + ] + if name_macro_rules == &sym::macro_rules && name_zip.as_str() == "zip" => { + let tts = ¯o_tts.trees().collect::<Vec<_>>(); + match &tts[..] { + [ + TokenTree::Delimited(_, first_delim, first_tts), + TokenTree::Token(Token { kind: token::FatArrow, .. }), + TokenTree::Delimited(_, second_delim, second_tts), + ] + if macro_delim == &token::Paren => { + let tts = &first_tts.trees().collect::<Vec<_>>(); + match &tts[..] { + [ + TokenTree::Token(Token { kind: token::Dollar, .. }), + TokenTree::Token(Token { kind: token::Ident(name, false), .. }), + ] + if first_delim == &token::Paren && name.as_str() == "a" => {}, + _ => panic!("value 3: {:?} {:?}", first_delim, first_tts), + } + let tts = &second_tts.trees().collect::<Vec<_>>(); + match &tts[..] { + [ + TokenTree::Token(Token { kind: token::Dollar, .. }), + TokenTree::Token(Token { kind: token::Ident(name, false), .. }), + ] + if second_delim == &token::Paren && name.as_str() == "a" => {}, + _ => panic!("value 4: {:?} {:?}", second_delim, second_tts), + } + }, + _ => panic!("value 2: {:?} {:?}", macro_delim, macro_tts), + } + }, + _ => panic!("value: {:?}",tts), + } + }) +} + +#[test] +fn string_to_tts_1() { + with_default_globals(|| { + let tts = string_to_stream("fn a (b : i32) { b; }".to_string()); + + let expected = TokenStream::new(vec![ + TokenTree::token(token::Ident(kw::Fn, false), sp(0, 2)).into(), + TokenTree::token(token::Ident(Name::intern("a"), false), sp(3, 4)).into(), + TokenTree::Delimited( + DelimSpan::from_pair(sp(5, 6), sp(13, 14)), + token::DelimToken::Paren, + TokenStream::new(vec![ + TokenTree::token(token::Ident(Name::intern("b"), false), sp(6, 7)).into(), + TokenTree::token(token::Colon, sp(8, 9)).into(), + TokenTree::token(token::Ident(sym::i32, false), sp(10, 13)).into(), + ]).into(), + ).into(), + TokenTree::Delimited( + DelimSpan::from_pair(sp(15, 16), sp(20, 21)), + token::DelimToken::Brace, + TokenStream::new(vec![ + TokenTree::token(token::Ident(Name::intern("b"), false), sp(17, 18)).into(), + TokenTree::token(token::Semi, sp(18, 19)).into(), + ]).into(), + ).into() + ]); + + assert_eq!(tts, expected); + }) +} + +#[test] fn parse_use() { + with_default_globals(|| { + let use_s = "use foo::bar::baz;"; + let vitem = string_to_item(use_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], use_s); + + let use_s = "use foo::bar as baz;"; + let vitem = string_to_item(use_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], use_s); + }) +} + +#[test] fn parse_extern_crate() { + with_default_globals(|| { + let ex_s = "extern crate foo;"; + let vitem = string_to_item(ex_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], ex_s); + + let ex_s = "extern crate foo as bar;"; + let vitem = string_to_item(ex_s.to_string()).unwrap(); + let vitem_s = item_to_string(&vitem); + assert_eq!(&vitem_s[..], ex_s); + }) +} + +fn get_spans_of_pat_idents(src: &str) -> Vec<Span> { + let item = string_to_item(src.to_string()).unwrap(); + + struct PatIdentVisitor { + spans: Vec<Span> + } + impl<'a> crate::visit::Visitor<'a> for PatIdentVisitor { + fn visit_pat(&mut self, p: &'a ast::Pat) { + match p.node { + PatKind::Ident(_ , ref spannedident, _) => { + self.spans.push(spannedident.span.clone()); + } + _ => { + crate::visit::walk_pat(self, p); + } + } + } + } + let mut v = PatIdentVisitor { spans: Vec::new() }; + crate::visit::walk_item(&mut v, &item); + return v.spans; +} + +#[test] fn span_of_self_arg_pat_idents_are_correct() { + with_default_globals(|| { + + let srcs = ["impl z { fn a (&self, &myarg: i32) {} }", + "impl z { fn a (&mut self, &myarg: i32) {} }", + "impl z { fn a (&'a self, &myarg: i32) {} }", + "impl z { fn a (self, &myarg: i32) {} }", + "impl z { fn a (self: Foo, &myarg: i32) {} }", + ]; + + for &src in &srcs { + let spans = get_spans_of_pat_idents(src); + let (lo, hi) = (spans[0].lo(), spans[0].hi()); + assert!("self" == &src[lo.to_usize()..hi.to_usize()], + "\"{}\" != \"self\". src=\"{}\"", + &src[lo.to_usize()..hi.to_usize()], src) + } + }) +} + +#[test] fn parse_exprs () { + with_default_globals(|| { + // just make sure that they parse.... + string_to_expr("3 + 4".to_string()); + string_to_expr("a::z.froob(b,&(987+3))".to_string()); + }) +} + +#[test] fn attrs_fix_bug () { + with_default_globals(|| { + string_to_item("pub fn mk_file_writer(path: &Path, flags: &[FileFlag]) + -> Result<Box<Writer>, String> { +#[cfg(windows)] +fn wb() -> c_int { + (O_WRONLY | libc::consts::os::extra::O_BINARY) as c_int +} + +#[cfg(unix)] +fn wb() -> c_int { O_WRONLY as c_int } + +let mut fflags: c_int = wb(); +}".to_string()); + }) +} + +#[test] fn crlf_doc_comments() { + with_default_globals(|| { + let sess = ParseSess::new(FilePathMapping::empty()); + + let name_1 = FileName::Custom("crlf_source_1".to_string()); + let source = "/// doc comment\r\nfn foo() {}".to_string(); + let item = parse_item_from_source_str(name_1, source, &sess) + .unwrap().unwrap(); + let doc = first_attr_value_str_by_name(&item.attrs, sym::doc).unwrap(); + assert_eq!(doc.as_str(), "/// doc comment"); + + let name_2 = FileName::Custom("crlf_source_2".to_string()); + let source = "/// doc comment\r\n/// line 2\r\nfn foo() {}".to_string(); + let item = parse_item_from_source_str(name_2, source, &sess) + .unwrap().unwrap(); + let docs = item.attrs.iter().filter(|a| a.path == sym::doc) + .map(|a| a.value_str().unwrap().to_string()).collect::<Vec<_>>(); + let b: &[_] = &["/// doc comment".to_string(), "/// line 2".to_string()]; + assert_eq!(&docs[..], b); + + let name_3 = FileName::Custom("clrf_source_3".to_string()); + let source = "/** doc comment\r\n * with CRLF */\r\nfn foo() {}".to_string(); + let item = parse_item_from_source_str(name_3, source, &sess).unwrap().unwrap(); + let doc = first_attr_value_str_by_name(&item.attrs, sym::doc).unwrap(); + assert_eq!(doc.as_str(), "/** doc comment\n * with CRLF */"); + }); +} + +#[test] +fn ttdelim_span() { + fn parse_expr_from_source_str( + name: FileName, source: String, sess: &ParseSess + ) -> PResult<'_, P<ast::Expr>> { + new_parser_from_source_str(sess, name, source).parse_expr() + } + + with_default_globals(|| { + let sess = ParseSess::new(FilePathMapping::empty()); + let expr = parse_expr_from_source_str(PathBuf::from("foo").into(), + "foo!( fn main() { body } )".to_string(), &sess).unwrap(); + + let tts: Vec<_> = match expr.node { + ast::ExprKind::Mac(ref mac) => mac.node.stream().trees().collect(), + _ => panic!("not a macro"), + }; + + let span = tts.iter().rev().next().unwrap().span(); + + match sess.source_map().span_to_snippet(span) { + Ok(s) => assert_eq!(&s[..], "{ body }"), + Err(_) => panic!("could not get snippet"), + } + }); +} + +// This tests that when parsing a string (rather than a file) we don't try +// and read in a file for a module declaration and just parse a stub. +// See `recurse_into_file_modules` in the parser. +#[test] +fn out_of_line_mod() { + with_default_globals(|| { + let sess = ParseSess::new(FilePathMapping::empty()); + let item = parse_item_from_source_str( + PathBuf::from("foo").into(), + "mod foo { struct S; mod this_does_not_exist; }".to_owned(), + &sess, + ).unwrap().unwrap(); + + if let ast::ItemKind::Mod(ref m) = item.node { + assert!(m.items.len() == 2); + } else { + panic!(); + } + }); +} + +#[test] +fn eqmodws() { + assert_eq!(matches_codepattern("",""),true); + assert_eq!(matches_codepattern("","a"),false); + assert_eq!(matches_codepattern("a",""),false); + assert_eq!(matches_codepattern("a","a"),true); + assert_eq!(matches_codepattern("a b","a \n\t\r b"),true); + assert_eq!(matches_codepattern("a b ","a \n\t\r b"),true); + assert_eq!(matches_codepattern("a b","a \n\t\r b "),false); + assert_eq!(matches_codepattern("a b","a b"),true); + assert_eq!(matches_codepattern("ab","a b"),false); + assert_eq!(matches_codepattern("a b","ab"),true); + assert_eq!(matches_codepattern(" a b","ab"),true); +} + +#[test] +fn pattern_whitespace() { + assert_eq!(matches_codepattern("","\x0C"), false); + assert_eq!(matches_codepattern("a b ","a \u{0085}\n\t\r b"),true); + assert_eq!(matches_codepattern("a b","a \u{0085}\n\t\r b "),false); +} + +#[test] +fn non_pattern_whitespace() { + // These have the property 'White_Space' but not 'Pattern_White_Space' + assert_eq!(matches_codepattern("a b","a\u{2002}b"), false); + assert_eq!(matches_codepattern("a b","a\u{2002}b"), false); + assert_eq!(matches_codepattern("\u{205F}a b","ab"), false); + assert_eq!(matches_codepattern("a \u{3000}b","ab"), false); +} diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 2fa4f5263fb..be800b4de66 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -1,25 +1,24 @@ pub use BinOpToken::*; pub use Nonterminal::*; pub use DelimToken::*; -pub use Lit::*; -pub use Token::*; +pub use LitKind::*; +pub use TokenKind::*; use crate::ast::{self}; -use crate::parse::ParseSess; +use crate::parse::{parse_stream_from_source_str, ParseSess}; use crate::print::pprust; use crate::ptr::P; -use crate::symbol::keywords; -use crate::syntax::parse::parse_stream_from_source_str; +use crate::symbol::kw; use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree}; -use syntax_pos::symbol::{self, Symbol}; -use syntax_pos::{self, Span, FileName}; +use syntax_pos::symbol::Symbol; +use syntax_pos::{self, Span, FileName, DUMMY_SP}; use log::info; use std::fmt; use std::mem; #[cfg(target_arch = "x86_64")] -use rustc_data_structures::static_assert; +use rustc_data_structures::static_assert_size; use rustc_data_structures::sync::Lrc; #[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] @@ -59,84 +58,143 @@ impl DelimToken { } } -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] -pub enum Lit { - Byte(ast::Name), - Char(ast::Name), - Err(ast::Name), - Integer(ast::Name), - Float(ast::Name), - Str_(ast::Name), - StrRaw(ast::Name, u16), /* raw str delimited by n hash symbols */ - ByteStr(ast::Name), - ByteStrRaw(ast::Name, u16), /* raw byte str delimited by n hash symbols */ +#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)] +pub enum LitKind { + Bool, // AST only, must never appear in a `Token` + Byte, + Char, + Integer, + Float, + Str, + StrRaw(u16), // raw string delimited by `n` hash symbols + ByteStr, + ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols + Err, } -impl Lit { - crate fn literal_name(&self) -> &'static str { - match *self { - Byte(_) => "byte literal", - Char(_) => "char literal", - Err(_) => "invalid literal", - Integer(_) => "integer literal", - Float(_) => "float literal", - Str_(_) | StrRaw(..) => "string literal", - ByteStr(_) | ByteStrRaw(..) => "byte string literal" +/// A literal token. +#[derive(Clone, Copy, PartialEq, RustcEncodable, RustcDecodable, Debug)] +pub struct Lit { + pub kind: LitKind, + pub symbol: Symbol, + pub suffix: Option<Symbol>, +} + +impl fmt::Display for Lit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Lit { kind, symbol, suffix } = *self; + match kind { + Byte => write!(f, "b'{}'", symbol)?, + Char => write!(f, "'{}'", symbol)?, + Str => write!(f, "\"{}\"", symbol)?, + StrRaw(n) => write!(f, "r{delim}\"{string}\"{delim}", + delim="#".repeat(n as usize), + string=symbol)?, + ByteStr => write!(f, "b\"{}\"", symbol)?, + ByteStrRaw(n) => write!(f, "br{delim}\"{string}\"{delim}", + delim="#".repeat(n as usize), + string=symbol)?, + Integer | + Float | + Bool | + Err => write!(f, "{}", symbol)?, } + + if let Some(suffix) = suffix { + write!(f, "{}", suffix)?; + } + + Ok(()) } +} - // See comments in `Nonterminal::to_tokenstream` for why we care about - // *probably* equal here rather than actual equality - fn probably_equal_for_proc_macro(&self, other: &Lit) -> bool { - mem::discriminant(self) == mem::discriminant(other) +impl LitKind { + /// An English article for the literal token kind. + crate fn article(self) -> &'static str { + match self { + Integer | Err => "an", + _ => "a", + } + } + + crate fn descr(self) -> &'static str { + match self { + Bool => panic!("literal token contains `Lit::Bool`"), + Byte => "byte", + Char => "char", + Integer => "integer", + Float => "float", + Str | StrRaw(..) => "string", + ByteStr | ByteStrRaw(..) => "byte string", + Err => "error", + } + } + + crate fn may_have_suffix(self) -> bool { + match self { + Integer | Float | Err => true, + _ => false, + } } } -pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool { - let ident_token: Token = Ident(ident, is_raw); +impl Lit { + pub fn new(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> Lit { + Lit { kind, symbol, suffix } + } +} + +pub(crate) fn ident_can_begin_expr(name: ast::Name, span: Span, is_raw: bool) -> bool { + let ident_token = Token::new(Ident(name, is_raw), span); !ident_token.is_reserved_ident() || ident_token.is_path_segment_keyword() || [ - keywords::Async.name(), - keywords::Do.name(), - keywords::Box.name(), - keywords::Break.name(), - keywords::Continue.name(), - keywords::False.name(), - keywords::For.name(), - keywords::If.name(), - keywords::Loop.name(), - keywords::Match.name(), - keywords::Move.name(), - keywords::Return.name(), - keywords::True.name(), - keywords::Unsafe.name(), - keywords::While.name(), - keywords::Yield.name(), - keywords::Static.name(), - ].contains(&ident.name) + kw::Async, + + // FIXME: remove when `await!(..)` syntax is removed + // https://github.com/rust-lang/rust/issues/60610 + kw::Await, + + kw::Do, + kw::Box, + kw::Break, + kw::Continue, + kw::False, + kw::For, + kw::If, + kw::Let, + kw::Loop, + kw::Match, + kw::Move, + kw::Return, + kw::True, + kw::Unsafe, + kw::While, + kw::Yield, + kw::Static, + ].contains(&name) } -fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool { - let ident_token: Token = Ident(ident, is_raw); +fn ident_can_begin_type(name: ast::Name, span: Span, is_raw: bool) -> bool { + let ident_token = Token::new(Ident(name, is_raw), span); !ident_token.is_reserved_ident() || ident_token.is_path_segment_keyword() || [ - keywords::Underscore.name(), - keywords::For.name(), - keywords::Impl.name(), - keywords::Fn.name(), - keywords::Unsafe.name(), - keywords::Extern.name(), - keywords::Typeof.name(), - keywords::Dyn.name(), - ].contains(&ident.name) + kw::Underscore, + kw::For, + kw::Impl, + kw::Fn, + kw::Unsafe, + kw::Extern, + kw::Typeof, + kw::Dyn, + ].contains(&name) } -#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Debug)] -pub enum Token { +#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] +pub enum TokenKind { /* Expression-operator symbols. */ Eq, Lt, @@ -176,11 +234,11 @@ pub enum Token { CloseDelim(DelimToken), /* Literals */ - Literal(Lit, Option<ast::Name>), + Literal(Lit), /* Name components */ - Ident(ast::Ident, /* is_raw */ bool), - Lifetime(ast::Ident), + Ident(ast::Name, /* is_raw */ bool), + Lifetime(ast::Name), Interpolated(Lrc<Nonterminal>), @@ -197,22 +255,69 @@ pub enum Token { /// A comment. Comment, Shebang(ast::Name), + /// A completely invalid token which should be skipped. + Unknown(ast::Name), Eof, } -// `Token` is used a lot. Make sure it doesn't unintentionally get bigger. +// `TokenKind` is used a lot. Make sure it doesn't unintentionally get bigger. #[cfg(target_arch = "x86_64")] -static_assert!(MEM_SIZE_OF_STATEMENT: mem::size_of::<Token>() == 16); +static_assert_size!(TokenKind, 16); + +#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] +pub struct Token { + pub kind: TokenKind, + pub span: Span, +} + +impl TokenKind { + pub fn lit(kind: LitKind, symbol: Symbol, suffix: Option<Symbol>) -> TokenKind { + Literal(Lit::new(kind, symbol, suffix)) + } + + /// Returns tokens that are likely to be typed accidentally instead of the current token. + /// Enables better error recovery when the wrong token is found. + crate fn similar_tokens(&self) -> Option<Vec<TokenKind>> { + match *self { + Comma => Some(vec![Dot, Lt, Semi]), + Semi => Some(vec![Colon, Comma]), + _ => None + } + } +} impl Token { + crate fn new(kind: TokenKind, span: Span) -> Self { + Token { kind, span } + } + + /// Some token that will be thrown away later. + crate fn dummy() -> Self { + Token::new(TokenKind::Whitespace, DUMMY_SP) + } + /// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary. - pub fn from_ast_ident(ident: ast::Ident) -> Token { - Ident(ident, ident.is_raw_guess()) + crate fn from_ast_ident(ident: ast::Ident) -> Self { + Token::new(Ident(ident.name, ident.is_raw_guess()), ident.span) + } + + /// Return this token by value and leave a dummy token in its place. + crate fn take(&mut self) -> Self { + mem::replace(self, Token::dummy()) + } + + crate fn is_op(&self) -> bool { + match self.kind { + OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | + Ident(..) | Lifetime(..) | Interpolated(..) | + Whitespace | Comment | Shebang(..) | Eof => false, + _ => true, + } } crate fn is_like_plus(&self) -> bool { - match *self { + match self.kind { BinOp(Plus) | BinOpEq(Plus) => true, _ => false, } @@ -220,9 +325,9 @@ impl Token { /// Returns `true` if the token can appear at the start of an expression. crate fn can_begin_expr(&self) -> bool { - match *self { - Ident(ident, is_raw) => - ident_can_begin_expr(ident, is_raw), // value name or keyword + match self.kind { + Ident(name, is_raw) => + ident_can_begin_expr(name, self.span, is_raw), // value name or keyword OpenDelim(..) | // tuple, array or block Literal(..) | // literal Not | // operator not @@ -252,9 +357,9 @@ impl Token { /// Returns `true` if the token can appear at the start of a type. crate fn can_begin_type(&self) -> bool { - match *self { - Ident(ident, is_raw) => - ident_can_begin_type(ident, is_raw), // type name or keyword + match self.kind { + Ident(name, is_raw) => + ident_can_begin_type(name, self.span, is_raw), // type name or keyword OpenDelim(Paren) | // tuple OpenDelim(Bracket) | // array Not | // never @@ -274,8 +379,8 @@ impl Token { } /// Returns `true` if the token can appear at the start of a const param. - pub fn can_begin_const_arg(&self) -> bool { - match self { + crate fn can_begin_const_arg(&self) -> bool { + match self.kind { OpenDelim(Brace) => true, Interpolated(ref nt) => match **nt { NtExpr(..) => true, @@ -289,26 +394,33 @@ impl Token { /// Returns `true` if the token can appear at the start of a generic bound. crate fn can_begin_bound(&self) -> bool { - self.is_path_start() || self.is_lifetime() || self.is_keyword(keywords::For) || + self.is_path_start() || self.is_lifetime() || self.is_keyword(kw::For) || self == &Question || self == &OpenDelim(Paren) } /// Returns `true` if the token is any literal crate fn is_lit(&self) -> bool { - match *self { + match self.kind { Literal(..) => true, _ => false, } } + crate fn expect_lit(&self) -> Lit { + match self.kind { + Literal(lit) => lit, + _=> panic!("`expect_lit` called on non-literal"), + } + } + /// Returns `true` if the token is any literal, a minus (which can prefix a literal, /// for example a '-42', or one of the boolean idents). crate fn can_begin_literal_or_bool(&self) -> bool { - match *self { + match self.kind { Literal(..) => true, BinOp(Minus) => true, - Ident(ident, false) if ident.name == keywords::True.name() => true, - Ident(ident, false) if ident.name == keywords::False.name() => true, + Ident(name, false) if name == kw::True => true, + Ident(name, false) if name == kw::False => true, Interpolated(ref nt) => match **nt { NtLiteral(..) => true, _ => false, @@ -319,8 +431,8 @@ impl Token { /// Returns an identifier if this token is an identifier. pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> { - match *self { - Ident(ident, is_raw) => Some((ident, is_raw)), + match self.kind { + Ident(name, is_raw) => Some((ast::Ident::new(name, self.span), is_raw)), Interpolated(ref nt) => match **nt { NtIdent(ident, is_raw) => Some((ident, is_raw)), _ => None, @@ -328,10 +440,11 @@ impl Token { _ => None, } } + /// Returns a lifetime identifier if this token is a lifetime. pub fn lifetime(&self) -> Option<ast::Ident> { - match *self { - Lifetime(ident) => Some(ident), + match self.kind { + Lifetime(name) => Some(ast::Ident::new(name, self.span)), Interpolated(ref nt) => match **nt { NtLifetime(ident) => Some(ident), _ => None, @@ -339,6 +452,7 @@ impl Token { _ => None, } } + /// Returns `true` if the token is an identifier. pub fn is_ident(&self) -> bool { self.ident().is_some() @@ -350,16 +464,13 @@ impl Token { /// Returns `true` if the token is a identifier whose name is the given /// string slice. - crate fn is_ident_named(&self, name: &str) -> bool { - match self.ident() { - Some((ident, _)) => ident.as_str() == name, - None => false - } + crate fn is_ident_named(&self, name: Symbol) -> bool { + self.ident().map_or(false, |(ident, _)| ident.name == name) } /// Returns `true` if the token is an interpolated path. fn is_path(&self) -> bool { - if let Interpolated(ref nt) = *self { + if let Interpolated(ref nt) = self.kind { if let NtPath(..) = **nt { return true; } @@ -367,10 +478,23 @@ impl Token { false } + /// Would `maybe_whole_expr` in `parser.rs` return `Ok(..)`? + /// That is, is this a pre-parsed expression dropped into the token stream + /// (which happens while parsing the result of macro expansion)? + crate fn is_whole_expr(&self) -> bool { + if let Interpolated(ref nt) = self.kind { + if let NtExpr(_) | NtLiteral(_) | NtPath(_) | NtIdent(..) | NtBlock(_) = **nt { + return true; + } + } + + false + } + /// Returns `true` if the token is either the `mut` or `const` keyword. crate fn is_mutability(&self) -> bool { - self.is_keyword(keywords::Mut) || - self.is_keyword(keywords::Const) + self.is_keyword(kw::Mut) || + self.is_keyword(kw::Const) } crate fn is_qpath_start(&self) -> bool { @@ -383,11 +507,11 @@ impl Token { } /// Returns `true` if the token is a given keyword, `kw`. - pub fn is_keyword(&self, kw: keywords::Keyword) -> bool { - self.ident().map(|(ident, is_raw)| ident.name == kw.name() && !is_raw).unwrap_or(false) + pub fn is_keyword(&self, kw: Symbol) -> bool { + self.ident().map(|(id, is_raw)| id.name == kw && !is_raw).unwrap_or(false) } - pub fn is_path_segment_keyword(&self) -> bool { + crate fn is_path_segment_keyword(&self) -> bool { match self.ident() { Some((id, false)) => id.is_path_segment_keyword(), _ => false, @@ -396,7 +520,7 @@ impl Token { // Returns true for reserved identifiers used internally for elided lifetimes, // unnamed method parameters, crate root module, error recovery etc. - pub fn is_special_ident(&self) -> bool { + crate fn is_special_ident(&self) -> bool { match self.ident() { Some((id, false)) => id.is_special(), _ => false, @@ -428,58 +552,52 @@ impl Token { } crate fn glue(self, joint: Token) -> Option<Token> { - Some(match self { - Eq => match joint { + let kind = match self.kind { + Eq => match joint.kind { Eq => EqEq, Gt => FatArrow, _ => return None, }, - Lt => match joint { + Lt => match joint.kind { Eq => Le, Lt => BinOp(Shl), Le => BinOpEq(Shl), BinOp(Minus) => LArrow, _ => return None, }, - Gt => match joint { + Gt => match joint.kind { Eq => Ge, Gt => BinOp(Shr), Ge => BinOpEq(Shr), _ => return None, }, - Not => match joint { + Not => match joint.kind { Eq => Ne, _ => return None, }, - BinOp(op) => match joint { + BinOp(op) => match joint.kind { Eq => BinOpEq(op), BinOp(And) if op == And => AndAnd, BinOp(Or) if op == Or => OrOr, Gt if op == Minus => RArrow, _ => return None, }, - Dot => match joint { + Dot => match joint.kind { Dot => DotDot, DotDot => DotDotDot, _ => return None, }, - DotDot => match joint { + DotDot => match joint.kind { Dot => DotDotDot, Eq => DotDotEq, _ => return None, }, - Colon => match joint { + Colon => match joint.kind { Colon => ModSep, _ => return None, }, - SingleQuote => match joint { - Ident(ident, false) => { - let name = Symbol::intern(&format!("'{}", ident)); - Lifetime(symbol::Ident { - name, - span: ident.span, - }) - } + SingleQuote => match joint.kind { + Ident(name, false) => Lifetime(Symbol::intern(&format!("'{}", name))), _ => return None, }, @@ -487,27 +605,19 @@ impl Token { DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question | OpenDelim(..) | CloseDelim(..) | Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) | - Whitespace | Comment | Shebang(..) | Eof => return None, - }) - } + Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None, + }; - /// Returns tokens that are likely to be typed accidentally instead of the current token. - /// Enables better error recovery when the wrong token is found. - crate fn similar_tokens(&self) -> Option<Vec<Token>> { - match *self { - Comma => Some(vec![Dot, Lt, Semi]), - Semi => Some(vec![Colon, Comma]), - _ => None - } + Some(Token::new(kind, self.span.to(joint.span))) } // See comments in `Nonterminal::to_tokenstream` for why we care about // *probably* equal here rather than actual equality crate fn probably_equal_for_proc_macro(&self, other: &Token) -> bool { - if mem::discriminant(self) != mem::discriminant(other) { + if mem::discriminant(&self.kind) != mem::discriminant(&other.kind) { return false } - match (self, other) { + match (&self.kind, &other.kind) { (&Eq, &Eq) | (&Lt, &Lt) | (&Le, &Le) | @@ -547,14 +657,12 @@ impl Token { (&DocComment(a), &DocComment(b)) | (&Shebang(a), &Shebang(b)) => a == b, - (&Lifetime(a), &Lifetime(b)) => a.name == b.name, - (&Ident(a, b), &Ident(c, d)) => b == d && (a.name == c.name || - a.name == keywords::DollarCrate.name() || - c.name == keywords::DollarCrate.name()), + (&Literal(a), &Literal(b)) => a == b, - (&Literal(ref a, b), &Literal(ref c, d)) => { - b == d && a.probably_equal_for_proc_macro(c) - } + (&Lifetime(a), &Lifetime(b)) => a == b, + (&Ident(a, b), &Ident(c, d)) => b == d && (a == c || + a == kw::DollarCrate || + c == kw::DollarCrate), (&Interpolated(_), &Interpolated(_)) => false, @@ -563,6 +671,12 @@ impl Token { } } +impl PartialEq<TokenKind> for Token { + fn eq(&self, rhs: &TokenKind) -> bool { + self.kind == *rhs + } +} + #[derive(Clone, RustcEncodable, RustcDecodable)] /// For interpolation during macro expansion. pub enum Nonterminal { @@ -580,14 +694,12 @@ pub enum Nonterminal { NtPath(ast::Path), NtVis(ast::Visibility), NtTT(TokenTree), - // These are not exposed to macros, but are used by quasiquote. - NtArm(ast::Arm), - NtImplItem(ast::ImplItem), + // Used only for passing items to proc macro attributes (they are not + // strictly necessary for that, `Annotatable` can be converted into + // tokens directly, but doing that naively regresses pretty-printing). NtTraitItem(ast::TraitItem), + NtImplItem(ast::ImplItem), NtForeignItem(ast::ForeignItem), - NtGenerics(ast::Generics), - NtWhereClause(ast::WhereClause), - NtArg(ast::Arg), } impl PartialEq for Nonterminal { @@ -620,13 +732,9 @@ impl fmt::Debug for Nonterminal { NtMeta(..) => f.pad("NtMeta(..)"), NtPath(..) => f.pad("NtPath(..)"), NtTT(..) => f.pad("NtTT(..)"), - NtArm(..) => f.pad("NtArm(..)"), NtImplItem(..) => f.pad("NtImplItem(..)"), NtTraitItem(..) => f.pad("NtTraitItem(..)"), NtForeignItem(..) => f.pad("NtForeignItem(..)"), - NtGenerics(..) => f.pad("NtGenerics(..)"), - NtWhereClause(..) => f.pad("NtWhereClause(..)"), - NtArg(..) => f.pad("NtArg(..)"), NtVis(..) => f.pad("NtVis(..)"), NtLifetime(..) => f.pad("NtLifetime(..)"), } @@ -658,12 +766,10 @@ impl Nonterminal { prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span) } Nonterminal::NtIdent(ident, is_raw) => { - let token = Token::Ident(ident, is_raw); - Some(TokenTree::Token(ident.span, token).into()) + Some(TokenTree::token(Ident(ident.name, is_raw), ident.span).into()) } Nonterminal::NtLifetime(ident) => { - let token = Token::Lifetime(ident); - Some(TokenTree::Token(ident.span, token).into()) + Some(TokenTree::token(Lifetime(ident.name), ident.span).into()) } Nonterminal::NtTT(ref tt) => { Some(tt.clone().into()) @@ -710,15 +816,6 @@ impl Nonterminal { } } -crate fn is_op(tok: &Token) -> bool { - match *tok { - OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) | - Ident(..) | Lifetime(..) | Interpolated(..) | - Whitespace | Comment | Shebang(..) | Eof => false, - _ => true, - } -} - fn prepend_attrs(sess: &ParseSess, attrs: &[ast::Attribute], tokens: Option<&tokenstream::TokenStream>, @@ -734,7 +831,7 @@ fn prepend_attrs(sess: &ParseSess, assert_eq!(attr.style, ast::AttrStyle::Outer, "inner attributes should prevent cached tokens from existing"); - let source = pprust::attr_to_string(attr); + let source = pprust::attribute_to_string(attr); let macro_filename = FileName::macro_expansion_source_code(&source); if attr.is_sugared_doc { let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span)); @@ -748,8 +845,8 @@ fn prepend_attrs(sess: &ParseSess, // For simple paths, push the identifier directly if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() { let ident = attr.path.segments[0].ident; - let token = Ident(ident, ident.as_str().starts_with("r#")); - brackets.push(tokenstream::TokenTree::Token(ident.span, token)); + let token = Ident(ident.name, ident.as_str().starts_with("r#")); + brackets.push(tokenstream::TokenTree::token(token, ident.span)); // ... and for more complicated paths, fall back to a reparse hack that // should eventually be removed. @@ -763,7 +860,7 @@ fn prepend_attrs(sess: &ParseSess, // The span we list here for `#` and for `[ ... ]` are both wrong in // that it encompasses more than each token, but it hopefully is "good // enough" for now at least. - builder.push(tokenstream::TokenTree::Token(attr.span, Pound)); + builder.push(tokenstream::TokenTree::token(Pound, attr.span)); let delim_span = DelimSpan::from_single(attr.span); builder.push(tokenstream::TokenTree::Delimited( delim_span, DelimToken::Bracket, brackets.build().into())); diff --git a/src/libsyntax/parse/unescape_error_reporting.rs b/src/libsyntax/parse/unescape_error_reporting.rs new file mode 100644 index 00000000000..7eee07e61a9 --- /dev/null +++ b/src/libsyntax/parse/unescape_error_reporting.rs @@ -0,0 +1,209 @@ +//! Utilities for rendering escape sequence errors as diagnostics. + +use std::ops::Range; +use std::iter::once; + +use rustc_lexer::unescape::{EscapeError, Mode}; +use syntax_pos::{Span, BytePos}; + +use crate::errors::{Handler, Applicability}; + +pub(crate) fn emit_unescape_error( + handler: &Handler, + // interior part of the literal, without quotes + lit: &str, + // full span of the literal, including quotes + span_with_quotes: Span, + mode: Mode, + // range of the error inside `lit` + range: Range<usize>, + error: EscapeError, +) { + log::debug!("emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", + lit, span_with_quotes, mode, range, error); + let span = { + let Range { start, end } = range; + let (start, end) = (start as u32, end as u32); + let lo = span_with_quotes.lo() + BytePos(start + 1); + let hi = lo + BytePos(end - start); + span_with_quotes + .with_lo(lo) + .with_hi(hi) + }; + let last_char = || { + let c = lit[range.clone()].chars().rev().next().unwrap(); + let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); + (c, span) + }; + match error { + EscapeError::LoneSurrogateUnicodeEscape => { + handler.struct_span_err(span, "invalid unicode character escape") + .help("unicode escape must not be a surrogate") + .emit(); + } + EscapeError::OutOfRangeUnicodeEscape => { + handler.struct_span_err(span, "invalid unicode character escape") + .help("unicode escape must be at most 10FFFF") + .emit(); + } + EscapeError::MoreThanOneChar => { + handler + .struct_span_err( + span_with_quotes, + "character literal may only contain one codepoint", + ) + .span_suggestion( + span_with_quotes, + "if you meant to write a `str` literal, use double quotes", + format!("\"{}\"", lit), + Applicability::MachineApplicable, + ).emit() + } + EscapeError::EscapeOnlyChar => { + let (c, _span) = last_char(); + + let mut msg = if mode.is_bytes() { + "byte constant must be escaped: " + } else { + "character constant must be escaped: " + }.to_string(); + push_escaped_char(&mut msg, c); + + handler.span_err(span, msg.as_str()) + } + EscapeError::BareCarriageReturn => { + let msg = if mode.in_double_quotes() { + "bare CR not allowed in string, use \\r instead" + } else { + "character constant must be escaped: \\r" + }; + handler.span_err(span, msg); + } + EscapeError::BareCarriageReturnInRawString => { + assert!(mode.in_double_quotes()); + let msg = "bare CR not allowed in raw string"; + handler.span_err(span, msg); + } + EscapeError::InvalidEscape => { + let (c, span) = last_char(); + + let label = if mode.is_bytes() { + "unknown byte escape" + } else { + "unknown character escape" + }; + let mut msg = label.to_string(); + msg.push_str(": "); + push_escaped_char(&mut msg, c); + + let mut diag = handler.struct_span_err(span, msg.as_str()); + diag.span_label(span, label); + if c == '{' || c == '}' && !mode.is_bytes() { + diag.help("if used in a formatting string, \ + curly braces are escaped with `{{` and `}}`"); + } else if c == '\r' { + diag.help("this is an isolated carriage return; \ + consider checking your editor and version control settings"); + } + diag.emit(); + } + EscapeError::TooShortHexEscape => { + handler.span_err(span, "numeric character escape is too short") + } + EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { + let (c, span) = last_char(); + + let mut msg = if error == EscapeError::InvalidCharInHexEscape { + "invalid character in numeric character escape: " + } else { + "invalid character in unicode escape: " + }.to_string(); + push_escaped_char(&mut msg, c); + + handler.span_err(span, msg.as_str()) + } + EscapeError::NonAsciiCharInByte => { + assert!(mode.is_bytes()); + let (_c, span) = last_char(); + handler.span_err(span, "byte constant must be ASCII. \ + Use a \\xHH escape for a non-ASCII byte") + } + EscapeError::NonAsciiCharInByteString => { + assert!(mode.is_bytes()); + let (_c, span) = last_char(); + handler.span_err(span, "raw byte string must be ASCII") + } + EscapeError::OutOfRangeHexEscape => { + handler.span_err(span, "this form of character escape may only be used \ + with characters in the range [\\x00-\\x7f]") + } + EscapeError::LeadingUnderscoreUnicodeEscape => { + let (_c, span) = last_char(); + handler.span_err(span, "invalid start of unicode escape") + } + EscapeError::OverlongUnicodeEscape => { + handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)") + } + EscapeError::UnclosedUnicodeEscape => { + handler.span_err(span, "unterminated unicode escape (needed a `}`)") + } + EscapeError::NoBraceInUnicodeEscape => { + let msg = "incorrect unicode escape sequence"; + let mut diag = handler.struct_span_err(span, msg); + + let mut suggestion = "\\u{".to_owned(); + let mut suggestion_len = 0; + let (c, char_span) = last_char(); + let chars = once(c).chain(lit[range.end..].chars()); + for c in chars.take(6).take_while(|c| c.is_digit(16)) { + suggestion.push(c); + suggestion_len += c.len_utf8(); + } + + if suggestion_len > 0 { + suggestion.push('}'); + let lo = char_span.lo(); + let hi = lo + BytePos(suggestion_len as u32); + diag.span_suggestion( + span.with_lo(lo).with_hi(hi), + "format of unicode escape sequences uses braces", + suggestion, + Applicability::MaybeIncorrect, + ); + } else { + diag.span_label(span, msg); + diag.help( + "format of unicode escape sequences is `\\u{...}`", + ); + } + + diag.emit(); + } + EscapeError::UnicodeEscapeInByte => { + handler.span_err(span, "unicode escape sequences cannot be used \ + as a byte or in a byte string") + } + EscapeError::EmptyUnicodeEscape => { + handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)") + } + EscapeError::ZeroChars => { + handler.span_err(span, "empty character literal") + } + EscapeError::LoneSlash => { + handler.span_err(span, "invalid trailing slash in literal") + } + } +} + +/// Pushes a character to a message string for error reporting +pub(crate) fn push_escaped_char(msg: &mut String, c: char) { + match c { + '\u{20}'..='\u{7e}' => { + // Don't escape \, ' or " for user-facing messages + msg.push(c); + } + _ => { + msg.extend(c.escape_default()); + } + } +} |
