From 593fdd3d45d7565e34dc429788fa81ca2e25a2d4 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Sat, 26 Sep 2020 21:56:29 -0400 Subject: Rewrite `collect_tokens` implementations to use a flattened buffer Instead of trying to collect tokens at each depth, we 'flatten' the stream as we go allong, pushing open/close delimiters to our buffer just like regular tokens. One capturing is complete, we reconstruct a nested `TokenTree::Delimited` structure, producing a normal `TokenStream`. The reconstructed `TokenStream` is not created immediately - instead, it is produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This closure stores a clone of the original `TokenCursor`, plus a record of the number of calls to `next()/next_desugared()`. This is sufficient to reconstruct the tokenstream seen by the callback without storing any additional state. If the tokenstream is never used (e.g. when a captured `macro_rules!` argument is never passed to a proc macro), we never actually create a `TokenStream`. This implementation has a number of advantages over the previous one: * It is significantly simpler, with no edge cases around capturing the start/end of a delimited group. * It can be easily extended to allow replacing tokens an an arbitrary 'depth' by just using `Vec::splice` at the proper position. This is important for PR #76130, which requires us to track information about attributes along with tokens. * The lazy approach to `TokenStream` construction allows us to easily parse an AST struct, and then decide after the fact whether we need a `TokenStream`. This will be useful when we start collecting tokens for `Attribute` - we can discard the `LazyTokenStream` if the parsed attribute doesn't need tokens (e.g. is a builtin attribute). The performance impact seems to be neglibile (see https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a small slowdown on a few benchmarks, but it only rises above 1% for incremental builds, where it represents a larger fraction of the much smaller instruction count. There a ~1% speedup on a few other incremental benchmarks - my guess is that the speedups and slowdowns will usually cancel out in practice. --- compiler/rustc_parse/src/parser/expr.rs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'compiler/rustc_parse/src/parser/expr.rs') diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index fb05f8791a5..698a7e7d9cd 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -6,6 +6,7 @@ use crate::maybe_recover_from_interpolated_ty_qpath; use rustc_ast::ptr::P; use rustc_ast::token::{self, Token, TokenKind}; +use rustc_ast::tokenstream::Spacing; use rustc_ast::util::classify; use rustc_ast::util::literal::LitError; use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; @@ -18,7 +19,6 @@ use rustc_span::source_map::{self, Span, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::{BytePos, Pos}; use std::mem; -use tracing::debug; /// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression /// dropped into the token stream, which happens while parsing the result of @@ -459,7 +459,7 @@ impl<'a> Parser<'a> { /// Parses a prefix-unary-operator expr. fn parse_prefix_expr(&mut self, attrs: Option) -> PResult<'a, P> { let attrs = self.parse_or_use_outer_attributes(attrs)?; - self.maybe_collect_tokens(!attrs.is_empty(), |this| { + self.maybe_collect_tokens(super::attr::maybe_needs_tokens(&attrs), |this| { let lo = this.token.span; // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() let (hi, ex) = match this.token.uninterpolate().kind { @@ -884,7 +884,7 @@ impl<'a> Parser<'a> { assert!(suffix.is_none()); let symbol = Symbol::intern(&i); self.token = Token::new(token::Ident(symbol, false), ident_span); - let next_token = Token::new(token::Dot, dot_span); + let next_token = (Token::new(token::Dot, dot_span), self.token_spacing); self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token)) } // 1.2 | 1.2e3 @@ -902,12 +902,14 @@ impl<'a> Parser<'a> { }; let symbol1 = Symbol::intern(&i1); self.token = Token::new(token::Ident(symbol1, false), ident1_span); - let next_token1 = Token::new(token::Dot, dot_span); + // This needs to be `Spacing::Alone` to prevent regressions. + // See issue #76399 and PR #76285 for more details + let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone); let base1 = self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1)); let symbol2 = Symbol::intern(&i2); let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span); - self.bump_with(next_token2); // `.` + self.bump_with((next_token2, self.token_spacing)); // `.` self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None) } // 1e+ | 1e- (recovered) @@ -930,7 +932,7 @@ impl<'a> Parser<'a> { base: P, field: Symbol, suffix: Option, - next_token: Option, + next_token: Option<(Token, Spacing)>, ) -> P { match next_token { Some(next_token) => self.bump_with(next_token), @@ -1109,12 +1111,11 @@ impl<'a> Parser<'a> { fn maybe_collect_tokens( &mut self, - has_outer_attrs: bool, + needs_tokens: bool, f: impl FnOnce(&mut Self) -> PResult<'a, P>, ) -> PResult<'a, P> { - if has_outer_attrs { + if needs_tokens { let (mut expr, tokens) = self.collect_tokens(f)?; - debug!("maybe_collect_tokens: Collected tokens for {:?} (tokens {:?}", expr, tokens); expr.tokens = Some(tokens); Ok(expr) } else { -- cgit 1.4.1-3-g733a5