From 593fdd3d45d7565e34dc429788fa81ca2e25a2d4 Mon Sep 17 00:00:00 2001
From: Aaron Hill <aa1ronham@gmail.com>
Date: Sat, 26 Sep 2020 21:56:29 -0400
Subject: Rewrite `collect_tokens` implementations to use a flattened buffer

Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.

The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.

This implementation has a number of advantages over the previous one:

* It is significantly simpler, with no edge cases around capturing the
  start/end of a delimited group.

* It can be easily extended to allow replacing tokens an an arbitrary
  'depth' by just using `Vec::splice` at the proper position. This is
  important for PR #76130, which requires us to track information about
  attributes along with tokens.

* The lazy approach to `TokenStream` construction allows us to easily
  parse an AST struct, and then decide after the fact whether we need a
  `TokenStream`. This will be useful when we start collecting tokens for
  `Attribute` - we can discard the `LazyTokenStream` if the parsed
  attribute doesn't need tokens (e.g. is a builtin attribute).

The performance impact seems to be neglibile (see
https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
---
 compiler/rustc_parse/src/parser/expr.rs | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'compiler/rustc_parse/src/parser/expr.rs')
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs
index fb05f8791a5..698a7e7d9cd 100644
--- a/compiler/rustc_parse/src/parser/expr.rs
+++ b/compiler/rustc_parse/src/parser/expr.rs
@@ -6,6 +6,7 @@ use crate::maybe_recover_from_interpolated_ty_qpath;
 
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, Token, TokenKind};
+use rustc_ast::tokenstream::Spacing;
 use rustc_ast::util::classify;
 use rustc_ast::util::literal::LitError;
 use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity};
@@ -18,7 +19,6 @@ use rustc_span::source_map::{self, Span, Spanned};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use rustc_span::{BytePos, Pos};
 use std::mem;
-use tracing::debug;
 
 /// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression
 /// dropped into the token stream, which happens while parsing the result of
@@ -459,7 +459,7 @@ impl<'a> Parser<'a> {
     /// Parses a prefix-unary-operator expr.
     fn parse_prefix_expr(&mut self, attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> {
         let attrs = self.parse_or_use_outer_attributes(attrs)?;
-        self.maybe_collect_tokens(!attrs.is_empty(), |this| {
+        self.maybe_collect_tokens(super::attr::maybe_needs_tokens(&attrs), |this| {
             let lo = this.token.span;
             // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr()
             let (hi, ex) = match this.token.uninterpolate().kind {
@@ -884,7 +884,7 @@ impl<'a> Parser<'a> {
                 assert!(suffix.is_none());
                 let symbol = Symbol::intern(&i);
                 self.token = Token::new(token::Ident(symbol, false), ident_span);
-                let next_token = Token::new(token::Dot, dot_span);
+                let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
                 self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token))
             }
             // 1.2 | 1.2e3
@@ -902,12 +902,14 @@ impl<'a> Parser<'a> {
                 };
                 let symbol1 = Symbol::intern(&i1);
                 self.token = Token::new(token::Ident(symbol1, false), ident1_span);
-                let next_token1 = Token::new(token::Dot, dot_span);
+                // This needs to be `Spacing::Alone` to prevent regressions.
+                // See issue #76399 and PR #76285 for more details
+                let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
                 let base1 =
                     self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1));
                 let symbol2 = Symbol::intern(&i2);
                 let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
-                self.bump_with(next_token2); // `.`
+                self.bump_with((next_token2, self.token_spacing)); // `.`
                 self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None)
             }
             // 1e+ | 1e- (recovered)
@@ -930,7 +932,7 @@ impl<'a> Parser<'a> {
         base: P<Expr>,
         field: Symbol,
         suffix: Option<Symbol>,
-        next_token: Option<Token>,
+        next_token: Option<(Token, Spacing)>,
     ) -> P<Expr> {
         match next_token {
             Some(next_token) => self.bump_with(next_token),
@@ -1109,12 +1111,11 @@ impl<'a> Parser<'a> {
 
     fn maybe_collect_tokens(
         &mut self,
-        has_outer_attrs: bool,
+        needs_tokens: bool,
         f: impl FnOnce(&mut Self) -> PResult<'a, P<Expr>>,
     ) -> PResult<'a, P<Expr>> {
-        if has_outer_attrs {
+        if needs_tokens {
             let (mut expr, tokens) = self.collect_tokens(f)?;
-            debug!("maybe_collect_tokens: Collected tokens for {:?} (tokens {:?}", expr, tokens);
             expr.tokens = Some(tokens);
             Ok(expr)
         } else {
-- 
cgit 1.4.1-3-g733a5