diff options
Diffstat (limited to 'compiler/rustc_parse/src/parser/mod.rs')
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 147 |
1 files changed, 107 insertions, 40 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 2fa25e40c6b..4b97c8b0a81 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -19,13 +19,16 @@ pub use path::PathStyle; use rustc_ast::ptr::P; use rustc_ast::token::{self, DelimToken, Token, TokenKind}; +use rustc_ast::tokenstream::AttributesData; use rustc_ast::tokenstream::{self, DelimSpan, Spacing}; -use rustc_ast::tokenstream::{TokenStream, TokenTree, TreeAndSpacing}; +use rustc_ast::tokenstream::{TokenStream, TokenTree}; +use rustc_ast::AttrId; use rustc_ast::DUMMY_NODE_ID; use rustc_ast::{self as ast, AnonConst, AstLike, AttrStyle, AttrVec, Const, CrateSugar, Extern}; use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit, Unsafe}; use rustc_ast::{Visibility, VisibilityKind}; use rustc_ast_pretty::pprust; +use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::Lrc; use rustc_errors::PResult; use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError}; @@ -34,6 +37,7 @@ use rustc_span::source_map::{Span, DUMMY_SP}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use tracing::debug; +use std::ops::Range; use std::{cmp, mem, slice}; bitflags::bitflags! { @@ -64,6 +68,7 @@ pub enum ForceCollect { No, } +#[derive(Debug, Eq, PartialEq)] pub enum TrailingToken { None, Semi, @@ -111,6 +116,7 @@ pub struct Parser<'a> { pub token_spacing: Spacing, /// The previous token. pub prev_token: Token, + pub capture_cfg: bool, restrictions: Restrictions, expected_tokens: Vec<TokenType>, // Important: This must only be advanced from `next_tok` @@ -134,6 +140,44 @@ pub struct Parser<'a> { pub last_type_ascription: Option<(Span, bool /* likely path typo */)>, /// If present, this `Parser` is not parsing Rust code but rather a macro call. subparser_name: Option<&'static str>, + capture_state: CaptureState, +} + +/// Indicates a range of tokens that should be replaced by +/// the tokens in the provided vector. This is used in two +/// places during token collection: +/// +/// 1. During the parsing of an AST node that may have a `#[derive]` +/// attribute, we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` +/// In this case, we use a `ReplaceRange` to replace the entire inner AST node +/// with `FlatToken::AttrTarget`, allowing us to perform eager cfg-expansion +/// on a `AttrAnnotatedTokenStream` +/// +/// 2. When we parse an inner attribute while collecting tokens. We +/// remove inner attributes from the token stream entirely, and +/// instead track them through the `attrs` field on the AST node. +/// This allows us to easily manipulate them (for example, removing +/// the first macro inner attribute to invoke a proc-macro). +/// When create a `TokenStream`, the inner attributes get inserted +/// into the proper place in the token stream. +pub type ReplaceRange = (Range<u32>, Vec<(FlatToken, Spacing)>); + +/// Controls how we capture tokens. Capturing can be expensive, +/// so we try to avoid performing capturing in cases where +/// we will never need a `AttrAnnotatedTokenStream` +#[derive(Copy, Clone)] +pub enum Capturing { + /// We aren't performing any capturing - this is the default mode. + No, + /// We are capturing tokens + Yes, +} + +#[derive(Clone)] +struct CaptureState { + capturing: Capturing, + replace_ranges: Vec<ReplaceRange>, + inner_attr_ranges: FxHashMap<AttrId, ReplaceRange>, } impl<'a> Drop for Parser<'a> { @@ -167,18 +211,11 @@ struct TokenCursor { // want to capture just the first 'unglued' token. // For example, capturing the `Vec<u8>` // in `Option<Vec<u8>>` requires us to unglue - // the trailing `>>` token. The `append_unglued_token` + // the trailing `>>` token. The `break_last_token` // field is used to track this token - it gets // appended to the captured stream when // we evaluate a `LazyTokenStream` - append_unglued_token: Option<TreeAndSpacing>, - // If `true`, skip the delimiters for `None`-delimited groups, - // and just yield the inner tokens. This is `true` during - // normal parsing, since the parser code is not currently prepared - // to handle `None` delimiters. When capturing a `TokenStream`, - // however, we want to handle `None`-delimiters, since - // proc-macros always see `None`-delimited groups. - skip_none_delims: bool, + break_last_token: bool, } #[derive(Clone)] @@ -191,13 +228,13 @@ struct TokenCursorFrame { } impl TokenCursorFrame { - fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream, skip_none_delims: bool) -> Self { + fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { TokenCursorFrame { delim, span, - open_delim: delim == token::NoDelim && skip_none_delims, + open_delim: false, tree_cursor: tts.into_trees(), - close_delim: delim == token::NoDelim && skip_none_delims, + close_delim: false, } } } @@ -225,7 +262,7 @@ impl TokenCursor { return (token, spacing); } TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, tts, self.skip_none_delims); + let frame = TokenCursorFrame::new(sp, delim, tts); self.stack.push(mem::replace(&mut self.frame, frame)); } } @@ -283,7 +320,6 @@ impl TokenCursor { .cloned() .collect::<TokenStream>() }, - self.skip_none_delims, ), )); @@ -372,26 +408,24 @@ impl<'a> Parser<'a> { desugar_doc_comments: bool, subparser_name: Option<&'static str>, ) -> Self { + let mut start_frame = TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens); + start_frame.open_delim = true; + start_frame.close_delim = true; + let mut parser = Parser { sess, token: Token::dummy(), token_spacing: Spacing::Alone, prev_token: Token::dummy(), + capture_cfg: false, restrictions: Restrictions::empty(), expected_tokens: Vec::new(), - // Skip over the delimiters for `None`-delimited groups token_cursor: TokenCursor { - frame: TokenCursorFrame::new( - DelimSpan::dummy(), - token::NoDelim, - tokens, - /* skip_none_delims */ true, - ), + frame: start_frame, stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, - append_unglued_token: None, - skip_none_delims: true, + break_last_token: false, }, desugar_doc_comments, unmatched_angle_bracket_count: 0, @@ -400,6 +434,11 @@ impl<'a> Parser<'a> { last_unexpected_token_span: None, last_type_ascription: None, subparser_name, + capture_state: CaptureState { + capturing: Capturing::No, + replace_ranges: Vec::new(), + inner_attr_ranges: Default::default(), + }, }; // Make parser point to the first token. @@ -409,21 +448,29 @@ impl<'a> Parser<'a> { } fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) { - let (mut next, spacing) = if self.desugar_doc_comments { - self.token_cursor.next_desugared() - } else { - self.token_cursor.next() - }; - self.token_cursor.num_next_calls += 1; - // We've retrieved an token from the underlying - // cursor, so we no longer need to worry about - // an unglued token. See `break_and_eat` for more details - self.token_cursor.append_unglued_token = None; - if next.span.is_dummy() { - // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = fallback_span.with_ctxt(next.span.ctxt()); + loop { + let (mut next, spacing) = if self.desugar_doc_comments { + self.token_cursor.next_desugared() + } else { + self.token_cursor.next() + }; + self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.break_last_token = false; + if next.span.is_dummy() { + // Tweak the location for better diagnostics, but keep syntactic context intact. + next.span = fallback_span.with_ctxt(next.span.ctxt()); + } + if matches!( + next.kind, + token::OpenDelim(token::NoDelim) | token::CloseDelim(token::NoDelim) + ) { + continue; + } + return (next, spacing); } - (next, spacing) } pub fn unexpected<T>(&mut self) -> PResult<'a, T> { @@ -621,8 +668,7 @@ impl<'a> Parser<'a> { // If we consume any additional tokens, then this token // is not needed (we'll capture the entire 'glued' token), // and `next_tok` will set this field to `None` - self.token_cursor.append_unglued_token = - Some((TokenTree::Token(self.token.clone()), Spacing::Alone)); + self.token_cursor.break_last_token = true; // Use the spacing of the glued token as the spacing // of the unglued second token. self.bump_with((Token::new(second, second_span), self.token_spacing)); @@ -1304,3 +1350,24 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa } } } + +/// A helper struct used when building a `AttrAnnotatedTokenStream` from +/// a `LazyTokenStream`. Both delimiter and non-delimited tokens +/// are stored as `FlatToken::Token`. A vector of `FlatToken`s +/// is then 'parsed' to build up a `AttrAnnotatedTokenStream` with nested +/// `AttrAnnotatedTokenTree::Delimited` tokens +#[derive(Debug, Clone)] +pub enum FlatToken { + /// A token - this holds both delimiter (e.g. '{' and '}') + /// and non-delimiter tokens + Token(Token), + /// Holds the `AttributesData` for an AST node. The + /// `AttributesData` is inserted directly into the + /// constructed `AttrAnnotatedTokenStream` as + /// a `AttrAnnotatedTokenTree::Attributes` + AttrTarget(AttributesData), + /// A special 'empty' token that is ignored during the conversion + /// to a `AttrAnnotatedTokenStream`. This is used to simplify the + /// handling of replace ranges. + Empty, +} |
