diff options
| author | Sean Cross <sean@xobs.io> | 2021-04-25 00:35:25 +0800 |
|---|---|---|
| committer | Sean Cross <sean@xobs.io> | 2021-04-25 00:35:25 +0800 |
| commit | f9d390d14ad891c4ce9fe108b86d6756ea5154ee (patch) | |
| tree | 5a12452fef7481362a5fcd06beb491ca4bcf7a69 /compiler/rustc_parse/src/parser | |
| parent | 8f73fe91f5db7de6e42ad7824a00b9729d2925b2 (diff) | |
| parent | e11a9fa52a3f372dadd6db3d3f2ed7dc2621dcc4 (diff) | |
| download | rust-f9d390d14ad891c4ce9fe108b86d6756ea5154ee.tar.gz rust-f9d390d14ad891c4ce9fe108b86d6756ea5154ee.zip | |
Merge remote-tracking branch 'upstream/master' into impl-16351-nightly
Signed-off-by: Sean Cross <sean@xobs.io>
Diffstat (limited to 'compiler/rustc_parse/src/parser')
| -rw-r--r-- | compiler/rustc_parse/src/parser/attr.rs | 22 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/attr_wrapper.rs | 494 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/diagnostics.rs | 136 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 166 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/item.rs | 125 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 175 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/nonterminal.rs | 31 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/pat.rs | 241 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/stmt.rs | 79 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/ty.rs | 4 |
10 files changed, 1068 insertions, 405 deletions
diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 95d4a48b845..ee6ff4dba39 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -1,10 +1,11 @@ -use super::{AttrWrapper, Parser, PathStyle}; +use super::{AttrWrapper, Capturing, Parser, PathStyle}; use rustc_ast as ast; use rustc_ast::attr; use rustc_ast::token::{self, Nonterminal}; use rustc_ast_pretty::pprust; use rustc_errors::{error_code, PResult}; use rustc_span::{sym, Span}; +use std::convert::TryInto; use tracing::debug; @@ -29,6 +30,7 @@ impl<'a> Parser<'a> { pub(super) fn parse_outer_attributes(&mut self) -> PResult<'a, AttrWrapper> { let mut attrs: Vec<ast::Attribute> = Vec::new(); let mut just_parsed_doc_comment = false; + let start_pos = self.token_cursor.num_next_calls; loop { debug!("parse_outer_attributes: self.token={:?}", self.token); let attr = if self.check(&token::Pound) { @@ -74,7 +76,7 @@ impl<'a> Parser<'a> { break; } } - Ok(AttrWrapper::new(attrs)) + Ok(AttrWrapper::new(attrs.into(), start_pos)) } /// Matches `attribute = # ! [ meta_item ]`. @@ -177,6 +179,7 @@ impl<'a> Parser<'a> { crate fn parse_inner_attributes(&mut self) -> PResult<'a, Vec<ast::Attribute>> { let mut attrs: Vec<ast::Attribute> = vec![]; loop { + let start_pos: u32 = self.token_cursor.num_next_calls.try_into().unwrap(); // Only try to parse if it is an inner attribute (has `!`). let attr = if self.check(&token::Pound) && self.look_ahead(1, |t| t == &token::Not) { Some(self.parse_attribute(InnerAttrPolicy::Permitted)?) @@ -191,6 +194,18 @@ impl<'a> Parser<'a> { None }; if let Some(attr) = attr { + let end_pos: u32 = self.token_cursor.num_next_calls.try_into().unwrap(); + // If we are currently capturing tokens, mark the location of this inner attribute. + // If capturing ends up creating a `LazyTokenStream`, we will include + // this replace range with it, removing the inner attribute from the final + // `AttrAnnotatedTokenStream`. Inner attributes are stored in the parsed AST note. + // During macro expansion, they are selectively inserted back into the + // token stream (the first inner attribute is remoevd each time we invoke the + // corresponding macro). + let range = start_pos..end_pos; + if let Capturing::Yes = self.capture_state.capturing { + self.capture_state.inner_attr_ranges.insert(attr.id, (range, vec![])); + } attrs.push(attr); } else { break; @@ -311,6 +326,9 @@ pub fn maybe_needs_tokens(attrs: &[ast::Attribute]) -> bool { // One of the attributes may either itself be a macro, // or expand to macro attributes (`cfg_attr`). attrs.iter().any(|attr| { + if attr.is_doc_comment() { + return false; + } attr.ident().map_or(true, |ident| { ident.name == sym::cfg_attr || !rustc_feature::is_builtin_attr_name(ident.name) }) diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index f45d8d6c7a0..35759a396e8 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -1,12 +1,14 @@ -use super::attr; -use super::{ForceCollect, Parser, TokenCursor, TrailingToken}; -use rustc_ast::token::{self, Token, TokenKind}; -use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing}; -use rustc_ast::tokenstream::{DelimSpan, LazyTokenStream, Spacing}; -use rustc_ast::AstLike; +use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken}; +use rustc_ast::token::{self, DelimToken, Token, TokenKind}; +use rustc_ast::tokenstream::{AttrAnnotatedTokenStream, AttributesData, CreateTokenStream}; +use rustc_ast::tokenstream::{AttrAnnotatedTokenTree, DelimSpan, LazyTokenStream, Spacing}; use rustc_ast::{self as ast}; +use rustc_ast::{AstLike, AttrVec, Attribute}; use rustc_errors::PResult; -use rustc_span::{Span, DUMMY_SP}; +use rustc_span::{sym, Span, DUMMY_SP}; + +use std::convert::TryInto; +use std::ops::Range; /// A wrapper type to ensure that the parser handles outer attributes correctly. /// When we parse outer attributes, we need to ensure that we capture tokens @@ -23,23 +25,158 @@ use rustc_span::{Span, DUMMY_SP}; /// cannot directly access the `attrs` field #[derive(Debug, Clone)] pub struct AttrWrapper { - attrs: Vec<ast::Attribute>, + attrs: AttrVec, + // The start of the outer attributes in the token cursor. + // This allows us to create a `ReplaceRange` for the entire attribute + // target, including outer attributes. + start_pos: usize, } +// This struct is passed around very frequently, +// so make sure it doesn't accidentally get larger +#[cfg(target_arch = "x86_64")] +rustc_data_structures::static_assert_size!(AttrWrapper, 16); + impl AttrWrapper { - pub fn empty() -> AttrWrapper { - AttrWrapper { attrs: vec![] } + pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper { + AttrWrapper { attrs, start_pos } } - pub fn new(attrs: Vec<ast::Attribute>) -> AttrWrapper { - AttrWrapper { attrs } + pub fn empty() -> AttrWrapper { + AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX } } // FIXME: Delay span bug here? - pub(crate) fn take_for_recovery(self) -> Vec<ast::Attribute> { + pub(crate) fn take_for_recovery(self) -> AttrVec { self.attrs } + + // FIXME: require passing an NT to prevent misuse of this method + pub(crate) fn prepend_to_nt_inner(self, attrs: &mut Vec<Attribute>) { + let mut self_attrs: Vec<_> = self.attrs.into(); + std::mem::swap(attrs, &mut self_attrs); + attrs.extend(self_attrs); + } + pub fn is_empty(&self) -> bool { self.attrs.is_empty() } + + pub fn maybe_needs_tokens(&self) -> bool { + crate::parser::attr::maybe_needs_tokens(&self.attrs) + } +} + +/// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute +fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool { + // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports. + // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that + // we don't need to do any eager expansion. + attrs.iter().any(|attr| { + attr.ident().map_or(false, |ident| ident.name == sym::cfg || ident.name == sym::cfg_attr) + }) +} + +// Produces a `TokenStream` on-demand. Using `cursor_snapshot` +// and `num_calls`, we can reconstruct the `TokenStream` seen +// by the callback. This allows us to avoid producing a `TokenStream` +// if it is never needed - for example, a captured `macro_rules!` +// argument that is never passed to a proc macro. +// In practice token stream creation happens rarely compared to +// calls to `collect_tokens` (see some statistics in #78736), +// so we are doing as little up-front work as possible. +// +// This also makes `Parser` very cheap to clone, since +// there is no intermediate collection buffer to clone. +#[derive(Clone)] +struct LazyTokenStreamImpl { + start_token: (Token, Spacing), + cursor_snapshot: TokenCursor, + num_calls: usize, + break_last_token: bool, + replace_ranges: Box<[ReplaceRange]>, +} + +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +rustc_data_structures::static_assert_size!(LazyTokenStreamImpl, 144); + +impl CreateTokenStream for LazyTokenStreamImpl { + fn create_token_stream(&self) -> AttrAnnotatedTokenStream { + // The token produced by the final call to `next` or `next_desugared` + // was not actually consumed by the callback. The combination + // of chaining the initial token and using `take` produces the desired + // result - we produce an empty `TokenStream` if no calls were made, + // and omit the final token otherwise. + let mut cursor_snapshot = self.cursor_snapshot.clone(); + let tokens = + std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1)) + .chain((0..self.num_calls).map(|_| { + let token = if cursor_snapshot.desugar_doc_comments { + cursor_snapshot.next_desugared() + } else { + cursor_snapshot.next() + }; + (FlatToken::Token(token.0), token.1) + })) + .take(self.num_calls); + + if !self.replace_ranges.is_empty() { + let mut tokens: Vec<_> = tokens.collect(); + let mut replace_ranges = self.replace_ranges.clone(); + replace_ranges.sort_by_key(|(range, _)| range.start); + + #[cfg(debug_assertions)] + { + for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() { + assert!( + range.end <= next_range.start || range.end >= next_range.end, + "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", + range, + tokens, + next_range, + next_tokens, + ); + } + } + + // Process the replace ranges, starting from the highest start + // position and working our way back. If have tokens like: + // + // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }` + // + // Then we will generate replace ranges for both + // the `#[cfg(FALSE)] field: bool` and the entire + // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }` + // + // By starting processing from the replace range with the greatest + // start position, we ensure that any replace range which encloses + // another replace range will capture the *replaced* tokens for the inner + // range, not the original tokens. + for (range, new_tokens) in replace_ranges.iter().rev() { + assert!(!range.is_empty(), "Cannot replace an empty range: {:?}", range); + // Replace ranges are only allowed to decrease the number of tokens. + assert!( + range.len() >= new_tokens.len(), + "Range {:?} has greater len than {:?}", + range, + new_tokens + ); + + // Replace any removed tokens with `FlatToken::Empty`. + // This keeps the total length of `tokens` constant throughout the + // replacement process, allowing us to use all of the `ReplaceRanges` entries + // without adjusting indices. + let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone)) + .take(range.len() - new_tokens.len()); + + tokens.splice( + (range.start as usize)..(range.end as usize), + new_tokens.clone().into_iter().chain(filler), + ); + } + make_token_stream(tokens.into_iter(), self.break_last_token) + } else { + make_token_stream(tokens, self.break_last_token) + } + } } impl<'a> Parser<'a> { @@ -65,77 +202,195 @@ impl<'a> Parser<'a> { force_collect: ForceCollect, f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, (R, TrailingToken)>, ) -> PResult<'a, R> { - if matches!(force_collect, ForceCollect::No) && !attr::maybe_needs_tokens(&attrs.attrs) { - return Ok(f(self, attrs.attrs)?.0); + // We only bail out when nothing could possibly observe the collected tokens: + // 1. We cannot be force collecting tokens (since force-collecting requires tokens + // by definition + if matches!(force_collect, ForceCollect::No) + // None of our outer attributes can require tokens (e.g. a proc-macro) + && !attrs.maybe_needs_tokens() + // If our target supports custom inner attributes, then we cannot bail + // out early, since we may need to capture tokens for a custom inner attribute + // invocation. + && !R::SUPPORTS_CUSTOM_INNER_ATTRS + // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]` + // or `#[cfg_attr]` attributes. + && !self.capture_cfg + { + return Ok(f(self, attrs.attrs.into())?.0); } + let start_token = (self.token.clone(), self.token_spacing); let cursor_snapshot = self.token_cursor.clone(); - let (mut ret, trailing_token) = f(self, attrs.attrs)?; - - // Produces a `TokenStream` on-demand. Using `cursor_snapshot` - // and `num_calls`, we can reconstruct the `TokenStream` seen - // by the callback. This allows us to avoid producing a `TokenStream` - // if it is never needed - for example, a captured `macro_rules!` - // argument that is never passed to a proc macro. - // In practice token stream creation happens rarely compared to - // calls to `collect_tokens` (see some statistics in #78736), - // so we are doing as little up-front work as possible. - // - // This also makes `Parser` very cheap to clone, since - // there is no intermediate collection buffer to clone. - #[derive(Clone)] - struct LazyTokenStreamImpl { - start_token: (Token, Spacing), - cursor_snapshot: TokenCursor, - num_calls: usize, - desugar_doc_comments: bool, - append_unglued_token: Option<TreeAndSpacing>, + let has_outer_attrs = !attrs.attrs.is_empty(); + let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes); + let replace_ranges_start = self.capture_state.replace_ranges.len(); + + let ret = f(self, attrs.attrs.into()); + + self.capture_state.capturing = prev_capturing; + + let (mut ret, trailing) = ret?; + + // When we're not in `capture-cfg` mode, then bail out early if: + // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`) + // so there's nothing for us to do. + // 2. Our target already has tokens set (e.g. we've parsed something + // like `#[my_attr] $item`. The actual parsing code takes care of prepending + // any attributes to the nonterminal, so we don't need to modify the + // already captured tokens. + // Note that this check is independent of `force_collect`- if we already + // have tokens, or can't even store them, then there's never a need to + // force collection of new tokens. + if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) { + return Ok(ret); + } + + // This is very similar to the bail out check at the start of this function. + // Now that we've parsed an AST node, we have more information available. + if matches!(force_collect, ForceCollect::No) + // We now have inner attributes available, so this check is more precise + // than `attrs.maybe_needs_tokens()` at the start of the function. + // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS` + && !crate::parser::attr::maybe_needs_tokens(ret.attrs()) + // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`. + // This ensures that we consider inner attributes (e.g. `#![cfg]`), + // which require us to have tokens available + // We also call `has_cfg_or_cfg_attr` at the beginning of this function, + // but we only bail out if there's no possibility of inner attributes + // (!R::SUPPORTS_CUSTOM_INNER_ATTRS) + // We only catpure about `#[cfg]` or `#[cfg_attr]` in `capture_cfg` + // mode - during normal parsing, we don't need any special capturing + // for those attributes, since they're builtin. + && !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs())) + { + return Ok(ret); } - impl CreateTokenStream for LazyTokenStreamImpl { - fn create_token_stream(&self) -> TokenStream { - // The token produced by the final call to `next` or `next_desugared` - // was not actually consumed by the callback. The combination - // of chaining the initial token and using `take` produces the desired - // result - we produce an empty `TokenStream` if no calls were made, - // and omit the final token otherwise. - let mut cursor_snapshot = self.cursor_snapshot.clone(); - let tokens = std::iter::once(self.start_token.clone()) - .chain((0..self.num_calls).map(|_| { - if self.desugar_doc_comments { - cursor_snapshot.next_desugared() - } else { - cursor_snapshot.next() - } - })) - .take(self.num_calls); - - make_token_stream(tokens, self.append_unglued_token.clone()) + + let mut inner_attr_replace_ranges = Vec::new(); + // Take the captured ranges for any inner attributes that we parsed. + for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) { + if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) { + inner_attr_replace_ranges.push(attr_range); + } else { + self.sess + .span_diagnostic + .delay_span_bug(inner_attr.span, "Missing token range for attribute"); } } - let mut num_calls = self.token_cursor.num_next_calls - cursor_snapshot.num_next_calls; - match trailing_token { + let replace_ranges_end = self.capture_state.replace_ranges.len(); + + let cursor_snapshot_next_calls = cursor_snapshot.num_next_calls; + let mut end_pos = self.token_cursor.num_next_calls; + + // Capture a trailing token if requested by the callback 'f' + match trailing { TrailingToken::None => {} TrailingToken::Semi => { assert_eq!(self.token.kind, token::Semi); - num_calls += 1; + end_pos += 1; } TrailingToken::MaybeComma => { if self.token.kind == token::Comma { - num_calls += 1; + end_pos += 1; } } } - let lazy_impl = LazyTokenStreamImpl { + // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens), + // then extend the range of captured tokens to include it, since the parser + // was not actually bumped past it. When the `LazyTokenStream` gets converted + // into a `AttrAnnotatedTokenStream`, we will create the proper token. + if self.token_cursor.break_last_token { + assert_eq!( + trailing, + TrailingToken::None, + "Cannot set `break_last_token` and have trailing token" + ); + end_pos += 1; + } + + let num_calls = end_pos - cursor_snapshot_next_calls; + + // If we have no attributes, then we will never need to + // use any replace ranges. + let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg { + Box::new([]) + } else { + // Grab any replace ranges that occur *inside* the current AST node. + // We will perform the actual replacement when we convert the `LazyTokenStream` + // to a `AttrAnnotatedTokenStream` + let start_calls: u32 = cursor_snapshot_next_calls.try_into().unwrap(); + self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end] + .iter() + .cloned() + .chain(inner_attr_replace_ranges.clone().into_iter()) + .map(|(range, tokens)| { + ((range.start - start_calls)..(range.end - start_calls), tokens) + }) + .collect() + }; + + let tokens = LazyTokenStream::new(LazyTokenStreamImpl { start_token, num_calls, cursor_snapshot, - desugar_doc_comments: self.desugar_doc_comments, - append_unglued_token: self.token_cursor.append_unglued_token.clone(), - }; - ret.finalize_tokens(LazyTokenStream::new(lazy_impl)); + break_last_token: self.token_cursor.break_last_token, + replace_ranges, + }); + + // If we support tokens at all + if let Some(target_tokens) = ret.tokens_mut() { + if let Some(target_tokens) = target_tokens { + assert!( + !self.capture_cfg, + "Encountered existing tokens with capture_cfg set: {:?}", + target_tokens + ); + } else { + // Store se our newly captured tokens into the AST node + *target_tokens = Some(tokens.clone()); + }; + } + + let final_attrs = ret.attrs(); + + // If `capture_cfg` is set and we're inside a recursive call to + // `collect_tokens_trailing_token`, then we need to register a replace range + // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion + // on the captured token stream. + if self.capture_cfg + && matches!(self.capture_state.capturing, Capturing::Yes) + && has_cfg_or_cfg_attr(&final_attrs) + { + let attr_data = AttributesData { attrs: final_attrs.to_vec().into(), tokens }; + + // Replace the entire AST node that we just parsed, including attributes, + // with a `FlatToken::AttrTarget`. If this AST node is inside an item + // that has `#[derive]`, then this will allow us to cfg-expand this + // AST node. + let start_pos = + if has_outer_attrs { attrs.start_pos } else { cursor_snapshot_next_calls }; + let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)]; + + assert!( + !self.token_cursor.break_last_token, + "Should not have unglued last token with cfg attr" + ); + let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap()); + self.capture_state.replace_ranges.push((range, new_tokens)); + self.capture_state.replace_ranges.extend(inner_attr_replace_ranges); + } + + // Only clear our `replace_ranges` when we're finished capturing entirely. + if matches!(self.capture_state.capturing, Capturing::No) { + self.capture_state.replace_ranges.clear(); + // We don't clear `inner_attr_ranges`, as doing so repeatedly + // had a measureable performance impact. Most inner attributes that + // we insert will get removed - when we drop the parser, we'll free + // up the memory used by any attributes that we didn't remove from the map. + } Ok(ret) } } @@ -143,43 +398,112 @@ impl<'a> Parser<'a> { /// Converts a flattened iterator of tokens (including open and close delimiter tokens) /// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair /// of open and close delims. +// FIXME(#67062): Currently, we don't parse `None`-delimited groups correctly, +// which can cause us to end up with mismatched `None` delimiters in our +// captured tokens. This function contains several hacks to work around this - +// essentially, we throw away mismatched `None` delimiters when we encounter them. +// Once we properly parse `None` delimiters, they can be captured just like any +// other tokens, and these hacks can be removed. fn make_token_stream( - tokens: impl Iterator<Item = (Token, Spacing)>, - append_unglued_token: Option<TreeAndSpacing>, -) -> TokenStream { + mut iter: impl Iterator<Item = (FlatToken, Spacing)>, + break_last_token: bool, +) -> AttrAnnotatedTokenStream { #[derive(Debug)] struct FrameData { open: Span, - inner: Vec<(TokenTree, Spacing)>, + open_delim: DelimToken, + inner: Vec<(AttrAnnotatedTokenTree, Spacing)>, } - let mut stack = vec![FrameData { open: DUMMY_SP, inner: vec![] }]; - for (token, spacing) in tokens { + let mut stack = + vec![FrameData { open: DUMMY_SP, open_delim: DelimToken::NoDelim, inner: vec![] }]; + let mut token_and_spacing = iter.next(); + while let Some((token, spacing)) = token_and_spacing { match token { - Token { kind: TokenKind::OpenDelim(_), span } => { - stack.push(FrameData { open: span, inner: vec![] }); + FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => { + stack.push(FrameData { open: span, open_delim: delim, inner: vec![] }); } - Token { kind: TokenKind::CloseDelim(delim), span } => { - let frame_data = stack.pop().expect("Token stack was empty!"); + FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => { + // HACK: If we enconter a mismatched `None` delimiter at the top + // level, just ignore it. + if matches!(delim, DelimToken::NoDelim) + && (stack.len() == 1 + || !matches!(stack.last_mut().unwrap().open_delim, DelimToken::NoDelim)) + { + token_and_spacing = iter.next(); + continue; + } + let frame_data = stack + .pop() + .unwrap_or_else(|| panic!("Token stack was empty for token: {:?}", token)); + + // HACK: If our current frame has a mismatched opening `None` delimiter, + // merge our current frame with the one above it. That is, transform + // `[ { < first second } third ]` into `[ { first second } third ]` + if !matches!(delim, DelimToken::NoDelim) + && matches!(frame_data.open_delim, DelimToken::NoDelim) + { + stack.last_mut().unwrap().inner.extend(frame_data.inner); + // Process our closing delimiter again, this time at the previous + // frame in the stack + token_and_spacing = Some((token, spacing)); + continue; + } + + assert_eq!( + frame_data.open_delim, delim, + "Mismatched open/close delims: open={:?} close={:?}", + frame_data.open, span + ); let dspan = DelimSpan::from_pair(frame_data.open, span); - let stream = TokenStream::new(frame_data.inner); - let delimited = TokenTree::Delimited(dspan, delim, stream); + let stream = AttrAnnotatedTokenStream::new(frame_data.inner); + let delimited = AttrAnnotatedTokenTree::Delimited(dspan, delim, stream); stack .last_mut() - .unwrap_or_else(|| panic!("Bottom token frame is missing for tokens!")) + .unwrap_or_else(|| { + panic!("Bottom token frame is missing for token: {:?}", token) + }) .inner .push((delimited, Spacing::Alone)); } - token => { - stack - .last_mut() - .expect("Bottom token frame is missing!") - .inner - .push((TokenTree::Token(token), spacing)); - } + FlatToken::Token(token) => stack + .last_mut() + .expect("Bottom token frame is missing!") + .inner + .push((AttrAnnotatedTokenTree::Token(token), spacing)), + FlatToken::AttrTarget(data) => stack + .last_mut() + .expect("Bottom token frame is missing!") + .inner + .push((AttrAnnotatedTokenTree::Attributes(data), spacing)), + FlatToken::Empty => {} } + token_and_spacing = iter.next(); + } + // HACK: If we don't have a closing `None` delimiter for our last + // frame, merge the frame with the top-level frame. That is, + // turn `< first second` into `first second` + if stack.len() == 2 && stack[1].open_delim == DelimToken::NoDelim { + let temp_buf = stack.pop().unwrap(); + stack.last_mut().unwrap().inner.extend(temp_buf.inner); } let mut final_buf = stack.pop().expect("Missing final buf!"); - final_buf.inner.extend(append_unglued_token); + if break_last_token { + let (last_token, spacing) = final_buf.inner.pop().unwrap(); + if let AttrAnnotatedTokenTree::Token(last_token) = last_token { + let unglued_first = last_token.kind.break_two_token_op().unwrap().0; + + // A 'unglued' token is always two ASCII characters + let mut first_span = last_token.span.shrink_to_lo(); + first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1)); + + final_buf.inner.push(( + AttrAnnotatedTokenTree::Token(Token::new(unglued_first, first_span)), + spacing, + )); + } else { + panic!("Unexpected last token {:?}", last_token) + } + } assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack); - TokenStream::new(final_buf.inner) + AttrAnnotatedTokenStream::new(final_buf.inner) } diff --git a/compiler/rustc_parse/src/parser/diagnostics.rs b/compiler/rustc_parse/src/parser/diagnostics.rs index f4ab3260d1a..70a5ac6f15e 100644 --- a/compiler/rustc_parse/src/parser/diagnostics.rs +++ b/compiler/rustc_parse/src/parser/diagnostics.rs @@ -640,7 +640,7 @@ impl<'a> Parser<'a> { } } Err(mut err) => { - // We could't parse generic parameters, unlikely to be a turbofish. Rely on + // We couldn't parse generic parameters, unlikely to be a turbofish. Rely on // generic parse error instead. err.cancel(); *self = snapshot; @@ -666,21 +666,23 @@ impl<'a> Parser<'a> { ); match x { Ok((_, _, false)) => { - self.bump(); // `>` - match self.parse_expr() { - Ok(_) => { - e.span_suggestion_verbose( - binop.span.shrink_to_lo(), - TURBOFISH_SUGGESTION_STR, - "::".to_string(), - Applicability::MaybeIncorrect, - ); - e.emit(); - *expr = self.mk_expr_err(expr.span.to(self.prev_token.span)); - return Ok(()); - } - Err(mut err) => { - err.cancel(); + if self.eat(&token::Gt) { + match self.parse_expr() { + Ok(_) => { + e.span_suggestion_verbose( + binop.span.shrink_to_lo(), + TURBOFISH_SUGGESTION_STR, + "::".to_string(), + Applicability::MaybeIncorrect, + ); + e.emit(); + *expr = + self.mk_expr_err(expr.span.to(self.prev_token.span)); + return Ok(()); + } + Err(mut err) => { + err.cancel(); + } } } } @@ -1242,7 +1244,7 @@ impl<'a> Parser<'a> { let is_question = self.eat(&token::Question); // Handle `await? <expr>`. let expr = if self.token == token::OpenDelim(token::Brace) { // Handle `await { <expr> }`. - // This needs to be handled separatedly from the next arm to avoid + // This needs to be handled separately from the next arm to avoid // interpreting `await { <expr> }?` as `<expr>?.await`. self.parse_block_expr(None, self.token.span, BlockCheckMode::Default, AttrVec::new()) } else { @@ -1613,42 +1615,82 @@ impl<'a> Parser<'a> { Applicability::HasPlaceholders, ); return Some(ident); - } else if let PatKind::Ident(_, ident, _) = pat.kind { - if require_name - && (self.token == token::Comma - || self.token == token::Lt - || self.token == token::CloseDelim(token::Paren)) - { - // `fn foo(a, b) {}`, `fn foo(a<x>, b<y>) {}` or `fn foo(usize, usize) {}` - if first_param { - err.span_suggestion( - pat.span, - "if this is a `self` type, give it a parameter name", - format!("self: {}", ident), - Applicability::MaybeIncorrect, - ); + } else if require_name + && (self.token == token::Comma + || self.token == token::Lt + || self.token == token::CloseDelim(token::Paren)) + { + let rfc_note = "anonymous parameters are removed in the 2018 edition (see RFC 1685)"; + + let (ident, self_sugg, param_sugg, type_sugg) = match pat.kind { + PatKind::Ident(_, ident, _) => ( + ident, + format!("self: {}", ident), + format!("{}: TypeName", ident), + format!("_: {}", ident), + ), + // Also catches `fn foo(&a)`. + PatKind::Ref(ref pat, mutab) + if matches!(pat.clone().into_inner().kind, PatKind::Ident(..)) => + { + match pat.clone().into_inner().kind { + PatKind::Ident(_, ident, _) => { + let mutab = mutab.prefix_str(); + ( + ident, + format!("self: &{}{}", mutab, ident), + format!("{}: &{}TypeName", ident, mutab), + format!("_: &{}{}", mutab, ident), + ) + } + _ => unreachable!(), + } } - // Avoid suggesting that `fn foo(HashMap<u32>)` is fixed with a change to - // `fn foo(HashMap: TypeName<u32>)`. - if self.token != token::Lt { - err.span_suggestion( - pat.span, - "if this is a parameter name, give it a type", - format!("{}: TypeName", ident), - Applicability::HasPlaceholders, - ); + _ => { + // Otherwise, try to get a type and emit a suggestion. + if let Some(ty) = pat.to_ty() { + err.span_suggestion_verbose( + pat.span, + "explicitly ignore the parameter name", + format!("_: {}", pprust::ty_to_string(&ty)), + Applicability::MachineApplicable, + ); + err.note(rfc_note); + } + + return None; } + }; + + // `fn foo(a, b) {}`, `fn foo(a<x>, b<y>) {}` or `fn foo(usize, usize) {}` + if first_param { err.span_suggestion( pat.span, - "if this is a type, explicitly ignore the parameter name", - format!("_: {}", ident), - Applicability::MachineApplicable, + "if this is a `self` type, give it a parameter name", + self_sugg, + Applicability::MaybeIncorrect, + ); + } + // Avoid suggesting that `fn foo(HashMap<u32>)` is fixed with a change to + // `fn foo(HashMap: TypeName<u32>)`. + if self.token != token::Lt { + err.span_suggestion( + pat.span, + "if this is a parameter name, give it a type", + param_sugg, + Applicability::HasPlaceholders, ); - err.note("anonymous parameters are removed in the 2018 edition (see RFC 1685)"); - - // Don't attempt to recover by using the `X` in `X<Y>` as the parameter name. - return if self.token == token::Lt { None } else { Some(ident) }; } + err.span_suggestion( + pat.span, + "if this is a type, explicitly ignore the parameter name", + type_sugg, + Applicability::MachineApplicable, + ); + err.note(rfc_note); + + // Don't attempt to recover by using the `X` in `X<Y>` as the parameter name. + return if self.token == token::Lt { None } else { Some(ident) }; } None } diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 28bfaea4555..e155b3fa773 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1,4 +1,4 @@ -use super::pat::{GateOr, RecoverComma, PARAM_EXPECTED}; +use super::pat::{RecoverComma, PARAM_EXPECTED}; use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign}; use super::{AttrWrapper, BlockMode, ForceCollect, Parser, PathStyle, Restrictions, TokenType}; use super::{SemiColonMode, SeqSep, TokenExpectType, TrailingToken}; @@ -10,7 +10,7 @@ use rustc_ast::tokenstream::Spacing; use rustc_ast::util::classify; use rustc_ast::util::literal::LitError; use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; -use rustc_ast::{self as ast, AttrStyle, AttrVec, CaptureBy, Field, Lit, UnOp, DUMMY_NODE_ID}; +use rustc_ast::{self as ast, AttrStyle, AttrVec, CaptureBy, ExprField, Lit, UnOp, DUMMY_NODE_ID}; use rustc_ast::{AnonConst, BinOp, BinOpKind, FnDecl, FnRetTy, MacCall, Param, Ty, TyKind}; use rustc_ast::{Arm, Async, BlockCheckMode, Expr, ExprKind, Label, Movability, RangeLimits}; use rustc_ast_pretty::pprust; @@ -92,7 +92,22 @@ impl<'a> Parser<'a> { self.parse_expr_res(Restrictions::empty(), None) } - pub(super) fn parse_anon_const_expr(&mut self) -> PResult<'a, AnonConst> { + /// Parses an expression, forcing tokens to be collected + pub fn parse_expr_force_collect(&mut self) -> PResult<'a, P<Expr>> { + // If we have outer attributes, then the call to `collect_tokens_trailing_token` + // will be made for us. + if matches!(self.token.kind, TokenKind::Pound | TokenKind::DocComment(..)) { + self.parse_expr() + } else { + // If we don't have outer attributes, then we need to ensure + // that collection happens by using `collect_tokens_no_attrs`. + // Expression don't support custom inner attributes, so `parse_expr` + // will never try to collect tokens if we don't have outer attributes. + self.collect_tokens_no_attrs(|this| this.parse_expr()) + } + } + + pub fn parse_anon_const_expr(&mut self) -> PResult<'a, AnonConst> { self.parse_expr().map(|value| AnonConst { id: DUMMY_NODE_ID, value }) } @@ -1803,7 +1818,7 @@ impl<'a> Parser<'a> { /// The `let` token has already been eaten. fn parse_let_expr(&mut self, attrs: AttrVec) -> PResult<'a, P<Expr>> { let lo = self.prev_token.span; - let pat = self.parse_pat_allow_top_alt(None, GateOr::No, RecoverComma::Yes)?; + let pat = self.parse_pat_allow_top_alt(None, RecoverComma::Yes)?; self.expect(&token::Eq)?; let expr = self.with_res(self.restrictions | Restrictions::NO_STRUCT_LITERAL, |this| { this.parse_assoc_expr_with(1 + prec_let_scrutinee_needs_par(), None.into()) @@ -1866,7 +1881,7 @@ impl<'a> Parser<'a> { _ => None, }; - let pat = self.parse_pat_allow_top_alt(None, GateOr::Yes, RecoverComma::Yes)?; + let pat = self.parse_pat_allow_top_alt(None, RecoverComma::Yes)?; if !self.eat_keyword(kw::In) { self.error_missing_in_for_loop(); } @@ -1973,11 +1988,107 @@ impl<'a> Parser<'a> { Ok(self.mk_expr(lo.to(hi), ExprKind::Match(scrutinee, arms), attrs)) } + /// Attempt to recover from match arm body with statements and no surrounding braces. + fn parse_arm_body_missing_braces( + &mut self, + first_expr: &P<Expr>, + arrow_span: Span, + ) -> Option<P<Expr>> { + if self.token.kind != token::Semi { + return None; + } + let start_snapshot = self.clone(); + let semi_sp = self.token.span; + self.bump(); // `;` + let mut stmts = + vec![self.mk_stmt(first_expr.span, ast::StmtKind::Expr(first_expr.clone()))]; + let err = |this: &mut Parser<'_>, stmts: Vec<ast::Stmt>| { + let span = stmts[0].span.to(stmts[stmts.len() - 1].span); + let mut err = this.struct_span_err(span, "`match` arm body without braces"); + let (these, s, are) = + if stmts.len() > 1 { ("these", "s", "are") } else { ("this", "", "is") }; + err.span_label( + span, + &format!( + "{these} statement{s} {are} not surrounded by a body", + these = these, + s = s, + are = are + ), + ); + err.span_label(arrow_span, "while parsing the `match` arm starting here"); + if stmts.len() > 1 { + err.multipart_suggestion( + &format!("surround the statement{} with a body", s), + vec![ + (span.shrink_to_lo(), "{ ".to_string()), + (span.shrink_to_hi(), " }".to_string()), + ], + Applicability::MachineApplicable, + ); + } else { + err.span_suggestion( + semi_sp, + "use a comma to end a `match` arm expression", + ",".to_string(), + Applicability::MachineApplicable, + ); + } + err.emit(); + this.mk_expr_err(span) + }; + // We might have either a `,` -> `;` typo, or a block without braces. We need + // a more subtle parsing strategy. + loop { + if self.token.kind == token::CloseDelim(token::Brace) { + // We have reached the closing brace of the `match` expression. + return Some(err(self, stmts)); + } + if self.token.kind == token::Comma { + *self = start_snapshot; + return None; + } + let pre_pat_snapshot = self.clone(); + match self.parse_pat_no_top_alt(None) { + Ok(_pat) => { + if self.token.kind == token::FatArrow { + // Reached arm end. + *self = pre_pat_snapshot; + return Some(err(self, stmts)); + } + } + Err(mut err) => { + err.cancel(); + } + } + + *self = pre_pat_snapshot; + match self.parse_stmt_without_recovery(true, ForceCollect::No) { + // Consume statements for as long as possible. + Ok(Some(stmt)) => { + stmts.push(stmt); + } + Ok(None) => { + *self = start_snapshot; + break; + } + // We couldn't parse either yet another statement missing it's + // enclosing block nor the next arm's pattern or closing brace. + Err(mut stmt_err) => { + stmt_err.cancel(); + *self = start_snapshot; + break; + } + } + } + None + } + pub(super) fn parse_arm(&mut self) -> PResult<'a, Arm> { let attrs = self.parse_outer_attributes()?; self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { let lo = this.token.span; - let pat = this.parse_pat_allow_top_alt(None, GateOr::No, RecoverComma::Yes)?; + let pat = this.parse_pat_allow_top_alt(None, RecoverComma::Yes)?; let guard = if this.eat_keyword(kw::If) { let if_span = this.prev_token.span; let cond = this.parse_expr()?; @@ -2007,6 +2118,21 @@ impl<'a> Parser<'a> { if require_comma { let sm = this.sess.source_map(); + if let Some(body) = this.parse_arm_body_missing_braces(&expr, arrow_span) { + let span = body.span; + return Ok(( + ast::Arm { + attrs, + pat, + guard, + body, + span, + id: DUMMY_NODE_ID, + is_placeholder: false, + }, + TrailingToken::None, + )); + } this.expect_one_of(&[token::Comma], &[token::CloseDelim(token::Brace)]).map_err( |mut err| { match (sm.span_to_lines(expr.span), sm.span_to_lines(arm_start_span)) { @@ -2205,7 +2331,7 @@ impl<'a> Parser<'a> { } let recovery_field = self.find_struct_error_after_field_looking_code(); - let parsed_field = match self.parse_field() { + let parsed_field = match self.parse_expr_field() { Ok(f) => Some(f), Err(mut e) => { if pth == kw::Async { @@ -2262,18 +2388,22 @@ impl<'a> Parser<'a> { let span = pth.span.to(self.token.span); self.expect(&token::CloseDelim(token::Brace))?; - let expr = if recover_async { ExprKind::Err } else { ExprKind::Struct(pth, fields, base) }; + let expr = if recover_async { + ExprKind::Err + } else { + ExprKind::Struct(P(ast::StructExpr { path: pth, fields, rest: base })) + }; Ok(self.mk_expr(span, expr, attrs)) } /// Use in case of error after field-looking code: `S { foo: () with a }`. - fn find_struct_error_after_field_looking_code(&self) -> Option<Field> { + fn find_struct_error_after_field_looking_code(&self) -> Option<ExprField> { match self.token.ident() { Some((ident, is_raw)) if (is_raw || !ident.is_reserved()) && self.look_ahead(1, |t| *t == token::Colon) => { - Some(ast::Field { + Some(ast::ExprField { ident, span: self.token.span, expr: self.mk_expr_err(self.token.span), @@ -2307,7 +2437,7 @@ impl<'a> Parser<'a> { } /// Parses `ident (COLON expr)?`. - fn parse_field(&mut self) -> PResult<'a, Field> { + fn parse_expr_field(&mut self) -> PResult<'a, ExprField> { let attrs = self.parse_outer_attributes()?; self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { let lo = this.token.span; @@ -2327,7 +2457,7 @@ impl<'a> Parser<'a> { }; Ok(( - ast::Field { + ast::ExprField { ident, span: lo.to(expr.span), expr, @@ -2451,19 +2581,17 @@ impl<'a> Parser<'a> { attrs: AttrWrapper, f: impl FnOnce(&mut Self, Vec<ast::Attribute>) -> PResult<'a, P<Expr>>, ) -> PResult<'a, P<Expr>> { - // FIXME - come up with a nice way to properly forward `ForceCollect`from - // the nonterminal parsing code. TThis approach iscorrect, but will cause - // us to unnecessarily capture tokens for exprs that have only builtin - // attributes. Revisit this before #![feature(stmt_expr_attributes)] is stabilized - let force_collect = if attrs.is_empty() { ForceCollect::No } else { ForceCollect::Yes }; - self.collect_tokens_trailing_token(attrs, force_collect, |this, attrs| { + self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { let res = f(this, attrs)?; let trailing = if this.restrictions.contains(Restrictions::STMT_EXPR) && this.token.kind == token::Semi { TrailingToken::Semi } else { - TrailingToken::None + // FIXME - pass this through from the place where we know + // we need a comma, rather than assuming that `#[attr] expr,` + // always captures a trailing comma + TrailingToken::MaybeComma }; Ok((res, trailing)) }) diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 9668a24bf8a..299b9a959c5 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -9,7 +9,7 @@ use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree}; use rustc_ast::{self as ast, AttrVec, Attribute, DUMMY_NODE_ID}; use rustc_ast::{Async, Const, Defaultness, IsAuto, Mutability, Unsafe, UseTree, UseTreeKind}; use rustc_ast::{BindingMode, Block, FnDecl, FnSig, Param, SelfKind}; -use rustc_ast::{EnumDef, Generics, StructField, TraitRef, Ty, TyKind, Variant, VariantData}; +use rustc_ast::{EnumDef, FieldDef, Generics, TraitRef, Ty, TyKind, Variant, VariantData}; use rustc_ast::{FnHeader, ForeignItem, Path, PathSegment, Visibility, VisibilityKind}; use rustc_ast::{MacArgs, MacCall, MacDelimiter}; use rustc_ast_pretty::pprust; @@ -103,20 +103,11 @@ impl<'a> Parser<'a> { // over when we bump the parser if let token::Interpolated(nt) = &self.token.kind { if let token::NtItem(item) = &**nt { - let item = item.clone(); - - return self.collect_tokens_trailing_token( - attrs, - force_collect, - |this, mut attrs| { - let mut item = item; - mem::swap(&mut item.attrs, &mut attrs); - item.attrs.extend(attrs); - // Bump the parser so the we capture the token::Interpolated - this.bump(); - Ok((Some(item.into_inner()), TrailingToken::None)) - }, - ); + let mut item = item.clone(); + self.bump(); + + attrs.prepend_to_nt_inner(&mut item.attrs); + return Ok(Some(item.into_inner())); } }; @@ -204,6 +195,7 @@ impl<'a> Parser<'a> { def: &mut Defaultness, req_name: ReqName, ) -> PResult<'a, Option<ItemInfo>> { + let def_final = def == &Defaultness::Final; let mut def = || mem::replace(def, Defaultness::Final); let info = if self.eat_keyword(kw::Use) { @@ -226,7 +218,7 @@ impl<'a> Parser<'a> { } (Ident::invalid(), ItemKind::Use(tree)) - } else if self.check_fn_front_matter() { + } else if self.check_fn_front_matter(def_final) { // FUNCTION ITEM let (ident, sig, generics, body) = self.parse_fn(attrs, req_name, lo)?; (ident, ItemKind::Fn(box FnKind(def(), sig, generics, body))) @@ -529,7 +521,7 @@ impl<'a> Parser<'a> { generics.where_clause = self.parse_where_clause()?; - let impl_items = self.parse_item_list(attrs, |p| p.parse_impl_item())?; + let impl_items = self.parse_item_list(attrs, |p| p.parse_impl_item(ForceCollect::No))?; let item_kind = match ty_second { Some(ty_second) => { @@ -717,22 +709,32 @@ impl<'a> Parser<'a> { } else { // It's a normal trait. tps.where_clause = self.parse_where_clause()?; - let items = self.parse_item_list(attrs, |p| p.parse_trait_item())?; + let items = self.parse_item_list(attrs, |p| p.parse_trait_item(ForceCollect::No))?; Ok((ident, ItemKind::Trait(box TraitKind(is_auto, unsafety, tps, bounds, items)))) } } - pub fn parse_impl_item(&mut self) -> PResult<'a, Option<Option<P<AssocItem>>>> { - self.parse_assoc_item(|_| true) + pub fn parse_impl_item( + &mut self, + force_collect: ForceCollect, + ) -> PResult<'a, Option<Option<P<AssocItem>>>> { + self.parse_assoc_item(|_| true, force_collect) } - pub fn parse_trait_item(&mut self) -> PResult<'a, Option<Option<P<AssocItem>>>> { - self.parse_assoc_item(|edition| edition >= Edition::Edition2018) + pub fn parse_trait_item( + &mut self, + force_collect: ForceCollect, + ) -> PResult<'a, Option<Option<P<AssocItem>>>> { + self.parse_assoc_item(|edition| edition >= Edition::Edition2018, force_collect) } /// Parses associated items. - fn parse_assoc_item(&mut self, req_name: ReqName) -> PResult<'a, Option<Option<P<AssocItem>>>> { - Ok(self.parse_item_(req_name, ForceCollect::No)?.map( + fn parse_assoc_item( + &mut self, + req_name: ReqName, + force_collect: ForceCollect, + ) -> PResult<'a, Option<Option<P<AssocItem>>>> { + Ok(self.parse_item_(req_name, force_collect)?.map( |Item { attrs, id, span, vis, ident, kind, tokens }| { let kind = match AssocItemKind::try_from(kind) { Ok(kind) => kind, @@ -917,14 +919,17 @@ impl<'a> Parser<'a> { unsafety: Unsafe, ) -> PResult<'a, ItemInfo> { let abi = self.parse_abi(); // ABI? - let items = self.parse_item_list(attrs, |p| p.parse_foreign_item())?; + let items = self.parse_item_list(attrs, |p| p.parse_foreign_item(ForceCollect::No))?; let module = ast::ForeignMod { unsafety, abi, items }; Ok((Ident::invalid(), ItemKind::ForeignMod(module))) } /// Parses a foreign item (one in an `extern { ... }` block). - pub fn parse_foreign_item(&mut self) -> PResult<'a, Option<Option<P<ForeignItem>>>> { - Ok(self.parse_item_(|_| true, ForceCollect::No)?.map( + pub fn parse_foreign_item( + &mut self, + force_collect: ForceCollect, + ) -> PResult<'a, Option<Option<P<ForeignItem>>>> { + Ok(self.parse_item_(|_| true, force_collect)?.map( |Item { attrs, id, span, vis, ident, kind, tokens }| { let kind = match ForeignItemKind::try_from(kind) { Ok(kind) => kind, @@ -1231,14 +1236,12 @@ impl<'a> Parser<'a> { Ok((class_name, ItemKind::Union(vdata, generics))) } - fn parse_record_struct_body( - &mut self, - ) -> PResult<'a, (Vec<StructField>, /* recovered */ bool)> { + fn parse_record_struct_body(&mut self) -> PResult<'a, (Vec<FieldDef>, /* recovered */ bool)> { let mut fields = Vec::new(); let mut recovered = false; if self.eat(&token::OpenDelim(token::Brace)) { while self.token != token::CloseDelim(token::Brace) { - let field = self.parse_struct_decl_field().map_err(|e| { + let field = self.parse_field_def().map_err(|e| { self.consume_block(token::Brace, ConsumeClosingDelim::No); recovered = true; e @@ -1263,7 +1266,7 @@ impl<'a> Parser<'a> { Ok((fields, recovered)) } - fn parse_tuple_struct_body(&mut self) -> PResult<'a, Vec<StructField>> { + fn parse_tuple_struct_body(&mut self) -> PResult<'a, Vec<FieldDef>> { // This is the case where we find `struct Foo<T>(T) where T: Copy;` // Unit like structs are handled in parse_item_struct function self.parse_paren_comma_seq(|p| { @@ -1274,7 +1277,7 @@ impl<'a> Parser<'a> { let ty = p.parse_ty()?; Ok(( - StructField { + FieldDef { span: lo.to(ty.span), vis, ident: None, @@ -1291,7 +1294,7 @@ impl<'a> Parser<'a> { } /// Parses an element of a struct declaration. - fn parse_struct_decl_field(&mut self) -> PResult<'a, StructField> { + fn parse_field_def(&mut self) -> PResult<'a, FieldDef> { let attrs = self.parse_outer_attributes()?; self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { let lo = this.token.span; @@ -1306,7 +1309,7 @@ impl<'a> Parser<'a> { lo: Span, vis: Visibility, attrs: Vec<Attribute>, - ) -> PResult<'a, StructField> { + ) -> PResult<'a, FieldDef> { let mut seen_comma: bool = false; let a_var = self.parse_name_and_ty(lo, vis, attrs)?; if self.token == token::Comma { @@ -1398,11 +1401,11 @@ impl<'a> Parser<'a> { lo: Span, vis: Visibility, attrs: Vec<Attribute>, - ) -> PResult<'a, StructField> { + ) -> PResult<'a, FieldDef> { let name = self.parse_ident_common(false)?; self.expect(&token::Colon)?; let ty = self.parse_ty()?; - Ok(StructField { + Ok(FieldDef { span: lo.to(self.prev_token.span), ident: Some(name), vis, @@ -1445,7 +1448,7 @@ impl<'a> Parser<'a> { Ok((ident, ItemKind::MacroDef(ast::MacroDef { body, macro_rules: false }))) } - /// Is this unambiguously the start of a `macro_rules! foo` item defnition? + /// Is this unambiguously the start of a `macro_rules! foo` item definition? fn is_macro_rules_item(&mut self) -> bool { self.check_keyword(kw::MacroRules) && self.look_ahead(1, |t| *t == token::Not) @@ -1636,18 +1639,27 @@ impl<'a> Parser<'a> { } /// Is the current token the start of an `FnHeader` / not a valid parse? - pub(super) fn check_fn_front_matter(&mut self) -> bool { + /// + /// `check_pub` adds additional `pub` to the checks in case users place it + /// wrongly, can be used to ensure `pub` never comes after `default`. + pub(super) fn check_fn_front_matter(&mut self, check_pub: bool) -> bool { // We use an over-approximation here. // `const const`, `fn const` won't parse, but we're not stepping over other syntax either. - const QUALS: [Symbol; 4] = [kw::Const, kw::Async, kw::Unsafe, kw::Extern]; + // `pub` is added in case users got confused with the ordering like `async pub fn`, + // only if it wasn't preceeded by `default` as `default pub` is invalid. + let quals: &[Symbol] = if check_pub { + &[kw::Pub, kw::Const, kw::Async, kw::Unsafe, kw::Extern] + } else { + &[kw::Const, kw::Async, kw::Unsafe, kw::Extern] + }; self.check_keyword(kw::Fn) // Definitely an `fn`. // `$qual fn` or `$qual $qual`: - || QUALS.iter().any(|&kw| self.check_keyword(kw)) + || quals.iter().any(|&kw| self.check_keyword(kw)) && self.look_ahead(1, |t| { // `$qual fn`, e.g. `const fn` or `async fn`. t.is_keyword(kw::Fn) // Two qualifiers `$qual $qual` is enough, e.g. `async unsafe`. - || t.is_non_raw_ident_where(|i| QUALS.contains(&i.name) + || t.is_non_raw_ident_where(|i| quals.contains(&i.name) // Rule out 2015 `const async: T = val`. && i.is_reserved() // Rule out unsafe extern block. @@ -1668,6 +1680,7 @@ impl<'a> Parser<'a> { /// FnFrontMatter = FnQual "fn" ; /// ``` pub(super) fn parse_fn_front_matter(&mut self) -> PResult<'a, FnHeader> { + let sp_start = self.token.span; let constness = self.parse_constness(); let asyncness = self.parse_asyncness(); let unsafety = self.parse_unsafety(); @@ -1681,8 +1694,27 @@ impl<'a> Parser<'a> { // It is possible for `expect_one_of` to recover given the contents of // `self.expected_tokens`, therefore, do not use `self.unexpected()` which doesn't // account for this. - if !self.expect_one_of(&[], &[])? { - unreachable!() + match self.expect_one_of(&[], &[]) { + Ok(true) => {} + Ok(false) => unreachable!(), + Err(mut err) => { + // Recover incorrect visibility order such as `async pub`. + if self.check_keyword(kw::Pub) { + let sp = sp_start.to(self.prev_token.span); + if let Ok(snippet) = self.span_to_snippet(sp) { + let vis = self.parse_visibility(FollowedByType::No)?; + let vs = pprust::vis_to_string(&vis); + let vs = vs.trim_end(); + err.span_suggestion( + sp_start.to(self.prev_token.span), + &format!("visibility `{}` must come before `{}`", vs, snippet), + format!("{} {}", vs, snippet), + Applicability::MachineApplicable, + ); + } + } + return Err(err); + } } } @@ -1757,8 +1789,9 @@ impl<'a> Parser<'a> { let (pat, ty) = if is_name_required || this.is_named_param() { debug!("parse_param_general parse_pat (is_name_required:{})", is_name_required); - let pat = this.parse_fn_param_pat()?; - if let Err(mut err) = this.expect(&token::Colon) { + let (pat, colon) = this.parse_fn_param_pat_colon()?; + if !colon { + let mut err = this.unexpected::<()>().unwrap_err(); return if let Some(ident) = this.parameter_without_type(&mut err, pat, is_name_required, first_param) { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 4cc2224d27e..ed95a5661b1 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -14,18 +14,21 @@ use crate::lexer::UnmatchedBrace; pub use attr_wrapper::AttrWrapper; pub use diagnostics::AttemptLocalParseRecovery; use diagnostics::Error; -pub use pat::{GateOr, RecoverComma}; +pub use pat::RecoverComma; pub use path::PathStyle; use rustc_ast::ptr::P; use rustc_ast::token::{self, DelimToken, Token, TokenKind}; +use rustc_ast::tokenstream::AttributesData; use rustc_ast::tokenstream::{self, DelimSpan, Spacing}; -use rustc_ast::tokenstream::{TokenStream, TokenTree, TreeAndSpacing}; +use rustc_ast::tokenstream::{TokenStream, TokenTree}; +use rustc_ast::AttrId; use rustc_ast::DUMMY_NODE_ID; use rustc_ast::{self as ast, AnonConst, AstLike, AttrStyle, AttrVec, Const, CrateSugar, Extern}; use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe}; use rustc_ast::{Visibility, VisibilityKind}; use rustc_ast_pretty::pprust; +use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; use rustc_errors::PResult; use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError}; @@ -34,6 +37,7 @@ use rustc_span::source_map::{Span, DUMMY_SP}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use tracing::debug; +use std::ops::Range; use std::{cmp, mem, slice}; bitflags::bitflags! { @@ -64,6 +68,7 @@ pub enum ForceCollect { No, } +#[derive(Debug, Eq, PartialEq)] pub enum TrailingToken { None, Semi, @@ -111,6 +116,7 @@ pub struct Parser<'a> { pub token_spacing: Spacing, /// The previous token. pub prev_token: Token, + pub capture_cfg: bool, restrictions: Restrictions, expected_tokens: Vec<TokenType>, // Important: This must only be advanced from `next_tok` @@ -134,6 +140,44 @@ pub struct Parser<'a> { pub last_type_ascription: Option<(Span, bool /* likely path typo */)>, /// If present, this `Parser` is not parsing Rust code but rather a macro call. subparser_name: Option<&'static str>, + capture_state: CaptureState, +} + +/// Indicates a range of tokens that should be replaced by +/// the tokens in the provided vector. This is used in two +/// places during token collection: +/// +/// 1. During the parsing of an AST node that may have a `#[derive]` +/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` +/// In this case, we use a `ReplaceRange` to replace the entire inner AST node +/// with `FlatToken::AttrTarget`, allowing us to perform eager cfg-expansion +/// on a `AttrAnnotatedTokenStream` +/// +/// 2. When we parse an inner attribute while collecting tokens. We +/// remove inner attributes from the token stream entirely, and +/// instead track them through the `attrs` field on the AST node. +/// This allows us to easily manipulate them (for example, removing +/// the first macro inner attribute to invoke a proc-macro). +/// When create a `TokenStream`, the inner attributes get inserted +/// into the proper place in the token stream. +pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>); + +/// Controls how we capture tokens. Capturing can be expensive, +/// so we try to avoid performing capturing in cases where +/// we will never need a `AttrAnnotatedTokenStream` +#[derive(Copy, Clone)] +pub enum Capturing { + /// We aren't performing any capturing - this is the default mode. + No, + /// We are capturing tokens + Yes, +} + +#[derive(Clone)] +struct CaptureState { + capturing: Capturing, + replace_ranges: Vec<ReplaceRange>, + inner_attr_ranges: FxHashMap<AttrId, ReplaceRange>, } impl<'a> Drop for Parser<'a> { @@ -167,11 +211,11 @@ struct TokenCursor { // want to capture just the first 'unglued' token. // For example, capturing the `Vec<u8>` // in `Option<Vec<u8>>` requires us to unglue - // the trailing `>>` token. The `append_unglued_token` + // the trailing `>>` token. The `break_last_token` // field is used to track this token - it gets // appended to the captured stream when // we evaluate a `LazyTokenStream` - append_unglued_token: Option<TreeAndSpacing>, + break_last_token: bool, } #[derive(Clone)] @@ -188,9 +232,9 @@ impl TokenCursorFrame { TokenCursorFrame { delim, span, - open_delim: delim == token::NoDelim, + open_delim: false, tree_cursor: tts.into_trees(), - close_delim: delim == token::NoDelim, + close_delim: false, } } } @@ -364,19 +408,24 @@ impl<'a> Parser<'a> { desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { + let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); + start_frame.open_delim = true; + start_frame.close_delim = true; + let mut parser = Parser { sess, token: Token::dummy(), token_spacing: Spacing::Alone, prev_token: Token::dummy(), + capture_cfg: false, restrictions: Restrictions::empty(), expected_tokens: Vec::new(), token_cursor: TokenCursor { - frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens), + frame: start_frame, stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, - append_unglued_token: None, + break_last_token: false, }, desugar_doc_comments, unmatched_angle_bracket_count: 0, @@ -385,6 +434,11 @@ impl<'a> Parser<'a> { last_unexpected_token_span: None, last_type_ascription: None, subparser_name, + capture_state: CaptureState { + capturing: Capturing::No, + replace_ranges: Vec::new(), + inner_attr_ranges: Default::default(), + }, }; // Make parser point to the first token. @@ -394,21 +448,29 @@ impl<'a> Parser<'a> { } fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) { - let (mut next, spacing) = if self.desugar_doc_comments { - self.token_cursor.next_desugared() - } else { - self.token_cursor.next() - }; - self.token_cursor.num_next_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.token_cursor.append_unglued_token = None; - if next.span.is_dummy() { - // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = fallback_span.with_ctxt(next.span.ctxt()); + loop { + let (mut next, spacing) = if self.desugar_doc_comments { + self.token_cursor.next_desugared() + } else { + self.token_cursor.next() + }; + self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.break_last_token = false; + if next.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + next.span = fallback_span.with_ctxt(next.span.ctxt()); + } + if matches!( + next.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + ) { + continue; + } + return (next, spacing); } - (next, spacing) } pub fn unexpected<T>(&mut self) -> PResult<'a, T> { @@ -606,8 +668,7 @@ impl<'a> Parser<'a> { // If we consume any additional tokens, then this token // is not needed (we'll capture the entire 'glued' token), // and `next_tok` will set this field to `None` - self.token_cursor.append_unglued_token = - Some((TokenTree::Token(self.token.clone()), Spacing::Alone)); + self.token_cursor.break_last_token = true; // Use the spacing of the glued token as the spacing // of the unglued second token. self.bump_with((Token::new(second, second_span), self.token_spacing)); @@ -688,6 +749,8 @@ impl<'a> Parser<'a> { let mut recovered = false; let mut trailing = false; let mut v = vec![]; + let unclosed_delims = !self.unclosed_delims.is_empty(); + while !self.expect_any_with_type(kets, expect) { if let token::CloseDelim(..) | token::Eof = self.token.kind { break; @@ -708,7 +771,7 @@ impl<'a> Parser<'a> { // Attempt to keep parsing if it was a similar separator. if let Some(ref tokens) = t.similar_tokens() { - if tokens.contains(&self.token.kind) { + if tokens.contains(&self.token.kind) && !unclosed_delims { self.bump(); } } @@ -867,15 +930,38 @@ impl<'a> Parser<'a> { } let frame = &self.token_cursor.frame; - match frame.tree_cursor.look_ahead(dist - 1) { - Some(tree) => match tree { - TokenTree::Token(token) => looker(token), - TokenTree::Delimited(dspan, delim, _) => { - looker(&Token::new(token::OpenDelim(*delim), dspan.open)) - } - }, - None => looker(&Token::new(token::CloseDelim(frame.delim), frame.span.close)), + if frame.delim != DelimToken::NoDelim { + let all_normal = (0..dist).all(|i| { + let token = frame.tree_cursor.look_ahead(i); + !matches!(token, Some(TokenTree::Delimited(_, DelimToken::NoDelim, _))) + }); + if all_normal { + return match frame.tree_cursor.look_ahead(dist - 1) { + Some(tree) => match tree { + TokenTree::Token(token) => looker(token), + TokenTree::Delimited(dspan, delim, _) => { + looker(&Token::new(token::OpenDelim(*delim), dspan.open)) + } + }, + None => looker(&Token::new(token::CloseDelim(frame.delim), frame.span.close)), + }; + } } + + let mut cursor = self.token_cursor.clone(); + let mut i = 0; + let mut token = Token::dummy(); + while i < dist { + token = cursor.next().0; + if matches!( + token.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + ) { + continue; + } + i += 1; + } + return looker(&token); } /// Returns whether any of the given keywords are `dist` tokens ahead of the current one. @@ -987,7 +1073,7 @@ impl<'a> Parser<'a> { } // Collect tokens because they are used during lowering to HIR. - let expr = self.collect_tokens_no_attrs(|this| this.parse_expr())?; + let expr = self.parse_expr_force_collect()?; let span = expr.span; match &expr.kind { @@ -1287,3 +1373,24 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa } } } + +/// A helper struct used when building a `AttrAnnotatedTokenStream` from +/// a `LazyTokenStream`. Both delimiter and non-delimited tokens +/// are stored as `FlatToken::Token`. A vector of `FlatToken`s +/// is then 'parsed' to build up a `AttrAnnotatedTokenStream` with nested +/// `AttrAnnotatedTokenTree::Delimited` tokens +#[derive(Debug, Clone)] +pub enum FlatToken { + /// A token - this holds both delimiter (e.g. '{' and '}') + /// and non-delimiter tokens + Token(Token), + /// Holds the `AttributesData` for an AST node. The + /// `AttributesData` is inserted directly into the + /// constructed `AttrAnnotatedTokenStream` as + /// a `AttrAnnotatedTokenTree::Attributes` + AttrTarget(AttributesData), + /// A special 'empty' token that is ignored during the conversion + /// to a `AttrAnnotatedTokenStream`. This is used to simplify the + /// handling of replace ranges. + Empty, +} diff --git a/compiler/rustc_parse/src/parser/nonterminal.rs b/compiler/rustc_parse/src/parser/nonterminal.rs index a84ae515144..5c4a2785d6e 100644 --- a/compiler/rustc_parse/src/parser/nonterminal.rs +++ b/compiler/rustc_parse/src/parser/nonterminal.rs @@ -4,7 +4,7 @@ use rustc_ast_pretty::pprust; use rustc_errors::PResult; use rustc_span::symbol::{kw, Ident}; -use crate::parser::pat::{GateOr, RecoverComma}; +use crate::parser::pat::RecoverComma; use crate::parser::{FollowedByType, ForceCollect, Parser, PathStyle}; impl<'a> Parser<'a> { @@ -61,7 +61,7 @@ impl<'a> Parser<'a> { }, _ => false, }, - NonterminalKind::Pat2018 { .. } | NonterminalKind::Pat2021 { .. } => match token.kind { + NonterminalKind::Pat2015 { .. } | NonterminalKind::Pat2021 { .. } => match token.kind { token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) token::OpenDelim(token::Paren) | // tuple pattern token::OpenDelim(token::Bracket) | // slice pattern @@ -118,32 +118,17 @@ impl<'a> Parser<'a> { return Err(self.struct_span_err(self.token.span, "expected a statement")); } }, - NonterminalKind::Pat2018 { .. } | NonterminalKind::Pat2021 { .. } => { + NonterminalKind::Pat2015 { .. } | NonterminalKind::Pat2021 { .. } => { token::NtPat(self.collect_tokens_no_attrs(|this| match kind { - NonterminalKind::Pat2018 { .. } => this.parse_pat_no_top_alt(None), + NonterminalKind::Pat2015 { .. } => this.parse_pat_no_top_alt(None), NonterminalKind::Pat2021 { .. } => { - this.parse_pat_allow_top_alt(None, GateOr::Yes, RecoverComma::No) + this.parse_pat_allow_top_alt(None, RecoverComma::No) } _ => unreachable!(), })?) } - // If there are attributes present, then `parse_expr` will end up collecting tokens, - // turning the outer `collect_tokens_no_attrs` into a no-op due to the already present - // tokens. If there are *not* attributes present, then the outer - // `collect_tokens_no_attrs` will ensure that we will end up collecting tokens for the - // expressions. - // - // This is less efficient than it could be, since the outer `collect_tokens_no_attrs` - // still needs to snapshot the `TokenCursor` before calling `parse_expr`, even when - // `parse_expr` will end up collecting tokens. Ideally, this would work more like - // `parse_item`, and take in a `ForceCollect` parameter. However, this would require - // adding a `ForceCollect` parameter in a bunch of places in expression parsing - // for little gain. If the perf impact from this turns out to be noticeable, we should - // revisit this apporach. - NonterminalKind::Expr => { - token::NtExpr(self.collect_tokens_no_attrs(|this| this.parse_expr())?) - } + NonterminalKind::Expr => token::NtExpr(self.parse_expr_force_collect()?), NonterminalKind::Literal => { // The `:literal` matcher does not support attributes token::NtLiteral( @@ -168,9 +153,7 @@ impl<'a> Parser<'a> { NonterminalKind::Path => token::NtPath( self.collect_tokens_no_attrs(|this| this.parse_path(PathStyle::Type))?, ), - NonterminalKind::Meta => { - token::NtMeta(P(self.collect_tokens_no_attrs(|this| this.parse_attr_item(false))?)) - } + NonterminalKind::Meta => token::NtMeta(P(self.parse_attr_item(true)?)), NonterminalKind::TT => token::NtTT(self.parse_token_tree()), NonterminalKind::Vis => token::NtVis( self.collect_tokens_no_attrs(|this| this.parse_visibility(FollowedByType::Yes))?, diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index 9e2e7359ca9..0abefbd6a12 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -3,7 +3,7 @@ use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; use rustc_ast::mut_visit::{noop_visit_pat, MutVisitor}; use rustc_ast::ptr::P; use rustc_ast::token; -use rustc_ast::{self as ast, AttrVec, Attribute, FieldPat, MacCall, Pat, PatKind, RangeEnd}; +use rustc_ast::{self as ast, AttrVec, Attribute, MacCall, Pat, PatField, PatKind, RangeEnd}; use rustc_ast::{BindingMode, Expr, ExprKind, Mutability, Path, QSelf, RangeSyntax}; use rustc_ast_pretty::pprust; use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, PResult}; @@ -17,13 +17,6 @@ pub(super) const PARAM_EXPECTED: Expected = Some("parameter name"); const WHILE_PARSING_OR_MSG: &str = "while parsing this or-pattern starting here"; -/// Whether or not an or-pattern should be gated when occurring in the current context. -#[derive(PartialEq, Clone, Copy)] -pub enum GateOr { - Yes, - No, -} - /// Whether or not to recover a `,` when parsing or-patterns. #[derive(PartialEq, Copy, Clone)] pub enum RecoverComma { @@ -31,6 +24,18 @@ pub enum RecoverComma { No, } +/// The result of `eat_or_separator`. We want to distinguish which case we are in to avoid +/// emitting duplicate diagnostics. +#[derive(Debug, Clone, Copy)] +enum EatOrResult { + /// We recovered from a trailing vert. + TrailingVert, + /// We ate an `|` (or `||` and recovered). + AteOr, + /// We did not eat anything (i.e. the current token is not `|` or `||`). + None, +} + impl<'a> Parser<'a> { /// Parses a pattern. /// @@ -52,16 +57,31 @@ impl<'a> Parser<'a> { pub fn parse_pat_allow_top_alt( &mut self, expected: Expected, - gate_or: GateOr, rc: RecoverComma, ) -> PResult<'a, P<Pat>> { + self.parse_pat_allow_top_alt_inner(expected, rc).map(|(pat, _)| pat) + } + + /// Returns the pattern and a bool indicating whether we recovered from a trailing vert (true = + /// recovered). + fn parse_pat_allow_top_alt_inner( + &mut self, + expected: Expected, + rc: RecoverComma, + ) -> PResult<'a, (P<Pat>, bool)> { + // Keep track of whether we recovered from a trailing vert so that we can avoid duplicated + // suggestions (which bothers rustfix). + // // Allow a '|' before the pats (RFCs 1925, 2530, and 2535). - let leading_vert_span = - if self.eat_or_separator(None) { Some(self.prev_token.span) } else { None }; + let (leading_vert_span, mut trailing_vert) = match self.eat_or_separator(None) { + EatOrResult::AteOr => (Some(self.prev_token.span), false), + EatOrResult::TrailingVert => (None, true), + EatOrResult::None => (None, false), + }; // Parse the first pattern (`p_0`). let first_pat = self.parse_pat_no_top_alt(expected)?; - self.maybe_recover_unexpected_comma(first_pat.span, rc, gate_or)?; + self.maybe_recover_unexpected_comma(first_pat.span, rc)?; // If the next token is not a `|`, // this is not an or-pattern and we should exit here. @@ -70,46 +90,92 @@ impl<'a> Parser<'a> { // then we should really gate the leading `|`. // This complicated procedure is done purely for diagnostics UX. if let Some(leading_vert_span) = leading_vert_span { - if gate_or == GateOr::Yes && self.sess.gated_spans.is_ungated(sym::or_patterns) { - self.sess.gated_spans.gate(sym::or_patterns, leading_vert_span); - } - // If there was a leading vert, treat this as an or-pattern. This improves // diagnostics. let span = leading_vert_span.to(self.prev_token.span); - return Ok(self.mk_pat(span, PatKind::Or(vec![first_pat]))); + return Ok((self.mk_pat(span, PatKind::Or(vec![first_pat])), trailing_vert)); } - return Ok(first_pat); + return Ok((first_pat, trailing_vert)); } // Parse the patterns `p_1 | ... | p_n` where `n > 0`. let lo = leading_vert_span.unwrap_or(first_pat.span); let mut pats = vec![first_pat]; - while self.eat_or_separator(Some(lo)) { + loop { + match self.eat_or_separator(Some(lo)) { + EatOrResult::AteOr => {} + EatOrResult::None => break, + EatOrResult::TrailingVert => { + trailing_vert = true; + break; + } + } let pat = self.parse_pat_no_top_alt(expected).map_err(|mut err| { err.span_label(lo, WHILE_PARSING_OR_MSG); err })?; - self.maybe_recover_unexpected_comma(pat.span, rc, gate_or)?; + self.maybe_recover_unexpected_comma(pat.span, rc)?; pats.push(pat); } let or_pattern_span = lo.to(self.prev_token.span); - // Feature gate the or-pattern if instructed: - if gate_or == GateOr::Yes { - self.sess.gated_spans.gate(sym::or_patterns, or_pattern_span); + Ok((self.mk_pat(or_pattern_span, PatKind::Or(pats)), trailing_vert)) + } + + /// Parse a pattern and (maybe) a `Colon` in positions where a pattern may be followed by a + /// type annotation (e.g. for `let` bindings or `fn` params). + /// + /// Generally, this corresponds to `pat_no_top_alt` followed by an optional `Colon`. It will + /// eat the `Colon` token if one is present. + /// + /// The return value represents the parsed pattern and `true` if a `Colon` was parsed (`false` + /// otherwise). + pub(super) fn parse_pat_before_ty( + &mut self, + expected: Expected, + rc: RecoverComma, + syntax_loc: &str, + ) -> PResult<'a, (P<Pat>, bool)> { + // We use `parse_pat_allow_top_alt` regardless of whether we actually want top-level + // or-patterns so that we can detect when a user tries to use it. This allows us to print a + // better error message. + let (pat, trailing_vert) = self.parse_pat_allow_top_alt_inner(expected, rc)?; + let colon = self.eat(&token::Colon); + + if let PatKind::Or(pats) = &pat.kind { + let msg = format!("top-level or-patterns are not allowed in {}", syntax_loc); + let (help, fix) = if pats.len() == 1 { + // If all we have is a leading vert, then print a special message. This is the case + // if `parse_pat_allow_top_alt` returns an or-pattern with one variant. + let msg = "remove the `|`"; + let fix = pprust::pat_to_string(&pat); + (msg, fix) + } else { + let msg = "wrap the pattern in parentheses"; + let fix = format!("({})", pprust::pat_to_string(&pat)); + (msg, fix) + }; + + if trailing_vert { + // We already emitted an error and suggestion to remove the trailing vert. Don't + // emit again. + self.sess.span_diagnostic.delay_span_bug(pat.span, &msg); + } else { + self.struct_span_err(pat.span, &msg) + .span_suggestion(pat.span, help, fix, Applicability::MachineApplicable) + .emit(); + } } - Ok(self.mk_pat(or_pattern_span, PatKind::Or(pats))) + Ok((pat, colon)) } - /// Parse the pattern for a function or function pointer parameter. - pub(super) fn parse_fn_param_pat(&mut self) -> PResult<'a, P<Pat>> { - // We actually do _not_ allow top-level or-patterns in function params, but we use - // `parse_pat_allow_top_alt` anyway so that we can detect when a user tries to use it. This - // allows us to print a better error message. - // + /// Parse the pattern for a function or function pointer parameter, followed by a colon. + /// + /// The return value represents the parsed pattern and `true` if a `Colon` was parsed (`false` + /// otherwise). + pub(super) fn parse_fn_param_pat_colon(&mut self) -> PResult<'a, (P<Pat>, bool)> { // In order to get good UX, we first recover in the case of a leading vert for an illegal // top-level or-pat. Normally, this means recovering both `|` and `||`, but in this case, // a leading `||` probably doesn't indicate an or-pattern attempt, so we handle that @@ -128,53 +194,23 @@ impl<'a> Parser<'a> { self.bump(); } - let pat = self.parse_pat_allow_top_alt(PARAM_EXPECTED, GateOr::No, RecoverComma::No)?; - - if let PatKind::Or(..) = &pat.kind { - self.ban_illegal_fn_param_or_pat(&pat); - } - - Ok(pat) - } - - /// Ban `A | B` immediately in a parameter pattern and suggest wrapping in parens. - fn ban_illegal_fn_param_or_pat(&self, pat: &Pat) { - // If all we have a leading vert, then print a special message. This is the case if - // `parse_pat_allow_top_alt` returns an or-pattern with one variant. - let (msg, fix) = match &pat.kind { - PatKind::Or(pats) if pats.len() == 1 => { - let msg = "remove the leading `|`"; - let fix = pprust::pat_to_string(pat); - (msg, fix) - } - - _ => { - let msg = "wrap the pattern in parentheses"; - let fix = format!("({})", pprust::pat_to_string(pat)); - (msg, fix) - } - }; - - self.struct_span_err(pat.span, "an or-pattern parameter must be wrapped in parentheses") - .span_suggestion(pat.span, msg, fix, Applicability::MachineApplicable) - .emit(); + self.parse_pat_before_ty(PARAM_EXPECTED, RecoverComma::No, "function parameters") } /// Eat the or-pattern `|` separator. /// If instead a `||` token is encountered, recover and pretend we parsed `|`. - fn eat_or_separator(&mut self, lo: Option<Span>) -> bool { + fn eat_or_separator(&mut self, lo: Option<Span>) -> EatOrResult { if self.recover_trailing_vert(lo) { - return false; - } - - match self.token.kind { - token::OrOr => { - // Found `||`; Recover and pretend we parsed `|`. - self.ban_unexpected_or_or(lo); - self.bump(); - true - } - _ => self.eat(&token::BinOp(token::Or)), + EatOrResult::TrailingVert + } else if matches!(self.token.kind, token::OrOr) { + // Found `||`; Recover and pretend we parsed `|`. + self.ban_unexpected_or_or(lo); + self.bump(); + EatOrResult::AteOr + } else if self.eat(&token::BinOp(token::Or)) { + EatOrResult::AteOr + } else { + EatOrResult::None } } @@ -190,14 +226,14 @@ impl<'a> Parser<'a> { matches!( &token.uninterpolate().kind, token::FatArrow // e.g. `a | => 0,`. - | token::Ident(kw::If, false) // e.g. `a | if expr`. - | token::Eq // e.g. `let a | = 0`. - | token::Semi // e.g. `let a |;`. - | token::Colon // e.g. `let a | :`. - | token::Comma // e.g. `let (a |,)`. - | token::CloseDelim(token::Bracket) // e.g. `let [a | ]`. - | token::CloseDelim(token::Paren) // e.g. `let (a | )`. - | token::CloseDelim(token::Brace) // e.g. `let A { f: a | }`. + | token::Ident(kw::If, false) // e.g. `a | if expr`. + | token::Eq // e.g. `let a | = 0`. + | token::Semi // e.g. `let a |;`. + | token::Colon // e.g. `let a | :`. + | token::Comma // e.g. `let (a |,)`. + | token::CloseDelim(token::Bracket) // e.g. `let [a | ]`. + | token::CloseDelim(token::Paren) // e.g. `let (a | )`. + | token::CloseDelim(token::Brace) // e.g. `let A { f: a | }`. ) }); match (is_end_ahead, &self.token.kind) { @@ -227,12 +263,7 @@ impl<'a> Parser<'a> { /// Some special error handling for the "top-level" patterns in a match arm, /// `for` loop, `let`, &c. (in contrast to subpatterns within such). - fn maybe_recover_unexpected_comma( - &mut self, - lo: Span, - rc: RecoverComma, - gate_or: GateOr, - ) -> PResult<'a, ()> { + fn maybe_recover_unexpected_comma(&mut self, lo: Span, rc: RecoverComma) -> PResult<'a, ()> { if rc == RecoverComma::No || self.token != token::Comma { return Ok(()); } @@ -253,22 +284,18 @@ impl<'a> Parser<'a> { if let Ok(seq_snippet) = self.span_to_snippet(seq_span) { const MSG: &str = "try adding parentheses to match on a tuple..."; - let or_suggestion = - gate_or == GateOr::No || !self.sess.gated_spans.is_ungated(sym::or_patterns); err.span_suggestion( seq_span, - if or_suggestion { MSG } else { MSG.trim_end_matches('.') }, + MSG, format!("({})", seq_snippet), Applicability::MachineApplicable, ); - if or_suggestion { - err.span_suggestion( - seq_span, - "...or a vertical bar to match on multiple alternatives", - seq_snippet.replace(",", " |"), - Applicability::MachineApplicable, - ); - } + err.span_suggestion( + seq_span, + "...or a vertical bar to match on multiple alternatives", + seq_snippet.replace(",", " |"), + Applicability::MachineApplicable, + ); } Err(err) } @@ -323,7 +350,7 @@ impl<'a> Parser<'a> { } else if self.check(&token::OpenDelim(token::Bracket)) { // Parse `[pat, pat,...]` as a slice pattern. let (pats, _) = self.parse_delim_comma_seq(token::Bracket, |p| { - p.parse_pat_allow_top_alt(None, GateOr::Yes, RecoverComma::No) + p.parse_pat_allow_top_alt(None, RecoverComma::No) })?; PatKind::Slice(pats) } else if self.check(&token::DotDot) && !self.is_pat_range_end_start(1) { @@ -536,9 +563,8 @@ impl<'a> Parser<'a> { /// Parse a tuple or parenthesis pattern. fn parse_pat_tuple_or_parens(&mut self) -> PResult<'a, PatKind> { - let (fields, trailing_comma) = self.parse_paren_comma_seq(|p| { - p.parse_pat_allow_top_alt(None, GateOr::Yes, RecoverComma::No) - })?; + let (fields, trailing_comma) = + self.parse_paren_comma_seq(|p| p.parse_pat_allow_top_alt(None, RecoverComma::No))?; // Here, `(pat,)` is a tuple pattern. // For backward compatibility, `(..)` is a tuple pattern as well. @@ -851,9 +877,8 @@ impl<'a> Parser<'a> { if qself.is_some() { return self.error_qpath_before_pat(&path, "("); } - let (fields, _) = self.parse_paren_comma_seq(|p| { - p.parse_pat_allow_top_alt(None, GateOr::Yes, RecoverComma::No) - })?; + let (fields, _) = + self.parse_paren_comma_seq(|p| p.parse_pat_allow_top_alt(None, RecoverComma::No))?; Ok(PatKind::TupleStruct(path, fields)) } @@ -868,7 +893,7 @@ impl<'a> Parser<'a> { } /// Parses the fields of a struct-like pattern. - fn parse_pat_fields(&mut self) -> PResult<'a, (Vec<FieldPat>, bool)> { + fn parse_pat_fields(&mut self) -> PResult<'a, (Vec<PatField>, bool)> { let mut fields = Vec::new(); let mut etc = false; let mut ate_comma = true; @@ -1012,14 +1037,14 @@ impl<'a> Parser<'a> { .emit(); } - fn parse_pat_field(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, FieldPat> { + fn parse_pat_field(&mut self, lo: Span, attrs: Vec<Attribute>) -> PResult<'a, PatField> { // Check if a colon exists one ahead. This means we're parsing a fieldname. let hi; let (subpat, fieldname, is_shorthand) = if self.look_ahead(1, |t| t == &token::Colon) { // Parsing a pattern of the form `fieldname: pat`. let fieldname = self.parse_field_name()?; self.bump(); - let pat = self.parse_pat_allow_top_alt(None, GateOr::Yes, RecoverComma::No)?; + let pat = self.parse_pat_allow_top_alt(None, RecoverComma::No)?; hi = pat.span; (pat, fieldname, false) } else { @@ -1044,7 +1069,7 @@ impl<'a> Parser<'a> { (subpat, fieldname, true) }; - Ok(FieldPat { + Ok(PatField { ident: fieldname, pat: subpat, is_shorthand, diff --git a/compiler/rustc_parse/src/parser/stmt.rs b/compiler/rustc_parse/src/parser/stmt.rs index 07746f2390d..592f64f4a39 100644 --- a/compiler/rustc_parse/src/parser/stmt.rs +++ b/compiler/rustc_parse/src/parser/stmt.rs @@ -1,7 +1,7 @@ use super::attr::DEFAULT_INNER_ATTR_FORBIDDEN; use super::diagnostics::{AttemptLocalParseRecovery, Error}; use super::expr::LhsExpr; -use super::pat::{GateOr, RecoverComma}; +use super::pat::RecoverComma; use super::path::PathStyle; use super::TrailingToken; use super::{AttrWrapper, BlockMode, ForceCollect, Parser, Restrictions, SemiColonMode}; @@ -13,7 +13,8 @@ use rustc_ast::token::{self, TokenKind}; use rustc_ast::util::classify; use rustc_ast::AstLike; use rustc_ast::{AttrStyle, AttrVec, Attribute, MacCall, MacCallStmt, MacStmtStyle}; -use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt, StmtKind, DUMMY_NODE_ID}; +use rustc_ast::{Block, BlockCheckMode, Expr, ExprKind, Local, Stmt}; +use rustc_ast::{StmtKind, DUMMY_NODE_ID}; use rustc_errors::{Applicability, PResult}; use rustc_span::source_map::{BytePos, Span}; use rustc_span::symbol::{kw, sym}; @@ -34,7 +35,7 @@ impl<'a> Parser<'a> { /// If `force_capture` is true, forces collection of tokens regardless of whether /// or not we have attributes - fn parse_stmt_without_recovery( + crate fn parse_stmt_without_recovery( &mut self, capture_semi: bool, force_collect: ForceCollect, @@ -47,39 +48,26 @@ impl<'a> Parser<'a> { if let token::Interpolated(nt) = &self.token.kind { if let token::NtStmt(stmt) = &**nt { let mut stmt = stmt.clone(); - return self.collect_tokens_trailing_token( - attrs, - force_collect, - |this, mut attrs| { - stmt.visit_attrs(|stmt_attrs| { - mem::swap(stmt_attrs, &mut attrs); - stmt_attrs.extend(attrs); - }); - // Make sure we capture the token::Interpolated - this.bump(); - Ok((Some(stmt), TrailingToken::None)) - }, - ); + self.bump(); + stmt.visit_attrs(|stmt_attrs| { + attrs.prepend_to_nt_inner(stmt_attrs); + }); + return Ok(Some(stmt)); } } Ok(Some(if self.token.is_keyword(kw::Let) { self.parse_local_mk(lo, attrs, capture_semi, force_collect)? } else if self.is_kw_followed_by_ident(kw::Mut) { - self.recover_stmt_local( - lo, - attrs.take_for_recovery().into(), - "missing keyword", - "let mut", - )? + self.recover_stmt_local(lo, attrs, "missing keyword", "let mut")? } else if self.is_kw_followed_by_ident(kw::Auto) { self.bump(); // `auto` let msg = "write `let` instead of `auto` to introduce a new variable"; - self.recover_stmt_local(lo, attrs.take_for_recovery().into(), msg, "let")? + self.recover_stmt_local(lo, attrs, msg, "let")? } else if self.is_kw_followed_by_ident(sym::var) { self.bump(); // `var` let msg = "write `let` instead of `var` to introduce a new variable"; - self.recover_stmt_local(lo, attrs.take_for_recovery().into(), msg, "let")? + self.recover_stmt_local(lo, attrs, msg, "let")? } else if self.check_path() && !self.token.is_qpath_start() && !self.is_path_start_item() { // We have avoided contextual keywords like `union`, items with `crate` visibility, // or `auto trait` items. We aim to parse an arbitrary path `a::b` but not something @@ -111,7 +99,7 @@ impl<'a> Parser<'a> { attrs: AttrWrapper, force_collect: ForceCollect, ) -> PResult<'a, Stmt> { - self.collect_tokens_trailing_token(attrs, force_collect, |this, attrs| { + let stmt = self.collect_tokens_trailing_token(attrs, force_collect, |this, attrs| { let path = this.parse_path(PathStyle::Expr)?; if this.eat(&token::Not) { @@ -131,14 +119,22 @@ impl<'a> Parser<'a> { }; let expr = this.with_res(Restrictions::STMT_EXPR, |this| { - let expr = this.parse_dot_or_call_expr_with(expr, lo, attrs)?; + this.parse_dot_or_call_expr_with(expr, lo, attrs) + })?; + // `DUMMY_SP` will get overwritten later in this function + Ok((this.mk_stmt(rustc_span::DUMMY_SP, StmtKind::Expr(expr)), TrailingToken::None)) + })?; + + if let StmtKind::Expr(expr) = stmt.kind { + // Perform this outside of the `collect_tokens_trailing_token` closure, + // since our outer attributes do not apply to this part of the expression + let expr = self.with_res(Restrictions::STMT_EXPR, |this| { this.parse_assoc_expr_with(0, LhsExpr::AlreadyParsed(expr)) })?; - Ok(( - this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Expr(expr)), - TrailingToken::None, - )) - }) + Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Expr(expr))) + } else { + Ok(stmt) + } } /// Parses a statement macro `mac!(args)` provided a `path` representing `mac`. @@ -182,7 +178,7 @@ impl<'a> Parser<'a> { fn recover_stmt_local( &mut self, lo: Span, - attrs: AttrVec, + attrs: AttrWrapper, msg: &str, sugg: &str, ) -> PResult<'a, Stmt> { @@ -212,17 +208,23 @@ impl<'a> Parser<'a> { }) } - fn recover_local_after_let(&mut self, lo: Span, attrs: AttrVec) -> PResult<'a, Stmt> { - let local = self.parse_local(attrs)?; - Ok(self.mk_stmt(lo.to(self.prev_token.span), StmtKind::Local(local))) + fn recover_local_after_let(&mut self, lo: Span, attrs: AttrWrapper) -> PResult<'a, Stmt> { + self.collect_tokens_trailing_token(attrs, ForceCollect::No, |this, attrs| { + let local = this.parse_local(attrs.into())?; + // FIXME - maybe capture semicolon in recovery? + Ok(( + this.mk_stmt(lo.to(this.prev_token.span), StmtKind::Local(local)), + TrailingToken::None, + )) + }) } /// Parses a local variable declaration. fn parse_local(&mut self, attrs: AttrVec) -> PResult<'a, P<Local>> { let lo = self.prev_token.span; - let pat = self.parse_pat_allow_top_alt(None, GateOr::Yes, RecoverComma::Yes)?; + let (pat, colon) = self.parse_pat_before_ty(None, RecoverComma::Yes, "`let` bindings")?; - let (err, ty) = if self.eat(&token::Colon) { + let (err, ty) = if colon { // Save the state of the parser before parsing type normally, in case there is a `:` // instead of an `=` typo. let parser_snapshot_before_type = self.clone(); @@ -289,7 +291,7 @@ impl<'a> Parser<'a> { Ok(P(ast::Local { ty, pat, init, id: DUMMY_NODE_ID, span: lo.to(hi), attrs, tokens: None })) } - /// Parses the RHS of a local variable declaration (e.g., '= 14;'). + /// Parses the RHS of a local variable declaration (e.g., `= 14;`). fn parse_initializer(&mut self, eq_optional: bool) -> PResult<'a, Option<P<Expr>>> { let eq_consumed = match self.token.kind { token::BinOpEq(..) => { @@ -304,6 +306,7 @@ impl<'a> Parser<'a> { "=".to_string(), Applicability::MaybeIncorrect, ) + .help("if you meant to overwrite, remove the `let` binding") .emit(); self.bump(); true diff --git a/compiler/rustc_parse/src/parser/ty.rs b/compiler/rustc_parse/src/parser/ty.rs index 8f03bfd4c3a..0f7b8ebd376 100644 --- a/compiler/rustc_parse/src/parser/ty.rs +++ b/compiler/rustc_parse/src/parser/ty.rs @@ -209,7 +209,7 @@ impl<'a> Parser<'a> { } else if self.eat_keyword(kw::Underscore) { // A type to be inferred `_` TyKind::Infer - } else if self.check_fn_front_matter() { + } else if self.check_fn_front_matter(false) { // Function pointer type self.parse_ty_bare_fn(lo, Vec::new(), recover_return_sign)? } else if self.check_keyword(kw::For) { @@ -217,7 +217,7 @@ impl<'a> Parser<'a> { // `for<'lt> [unsafe] [extern "ABI"] fn (&'lt S) -> T` // `for<'lt> Trait1<'lt> + Trait2 + 'a` let lifetime_defs = self.parse_late_bound_lifetime_defs()?; - if self.check_fn_front_matter() { + if self.check_fn_front_matter(false) { self.parse_ty_bare_fn(lo, lifetime_defs, recover_return_sign)? } else { let path = self.parse_path(PathStyle::Type)?; |
