From 28236ab703d21483dc818108f157fbb2da5a2802 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Apr 2025 11:18:08 +1000 Subject: Move various token stream things from `rustc_parse` to `rustc_ast`. Specifically: `TokenCursor`, `TokenTreeCursor`, `LazyAttrTokenStreamImpl`, `FlatToken`, `make_attr_token_stream`, `ParserRange`, `NodeRange`. `ParserReplacement`, and `NodeReplacement`. These are all related to token streams, rather than actual parsing. This will facilitate the simplifications in the next commit. --- compiler/rustc_parse/src/lib.rs | 1 - compiler/rustc_parse/src/parser/attr.rs | 4 +- compiler/rustc_parse/src/parser/attr_wrapper.rs | 169 +---------------------- compiler/rustc_parse/src/parser/mod.rs | 176 +----------------------- 4 files changed, 12 insertions(+), 338 deletions(-) (limited to 'compiler/rustc_parse/src') diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index e73d68e2037..37204702bcb 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -5,7 +5,6 @@ #![allow(rustc::diagnostic_outside_of_impl)] #![allow(rustc::untranslatable_diagnostic)] #![cfg_attr(bootstrap, feature(let_chains))] -#![feature(array_windows)] #![feature(assert_matches)] #![feature(box_patterns)] #![feature(debug_closure_helpers)] diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 53614049f08..41d3889c448 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -1,5 +1,6 @@ use rustc_ast as ast; use rustc_ast::token::{self, MetaVarKind}; +use rustc_ast::tokenstream::ParserRange; use rustc_ast::{Attribute, attr}; use rustc_errors::codes::*; use rustc_errors::{Diag, PResult}; @@ -8,8 +9,7 @@ use thin_vec::ThinVec; use tracing::debug; use super::{ - AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing, - UsePreAttrPos, + AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle, Trailing, UsePreAttrPos, }; use crate::{errors, exp, fluent_generated as fluent}; diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 6061c9cb485..912045d8835 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -1,21 +1,18 @@ use std::borrow::Cow; -use std::{iter, mem}; +use std::mem; -use rustc_ast::token::{Delimiter, Token}; +use rustc_ast::token::Token; use rustc_ast::tokenstream::{ - AttrTokenStream, AttrTokenTree, AttrsTarget, DelimSpacing, DelimSpan, LazyAttrTokenStream, - Spacing, ToAttrTokenStream, + AttrsTarget, LazyAttrTokenStream, LazyAttrTokenStreamImpl, NodeRange, ParserRange, Spacing, + TokenCursor, }; use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens}; use rustc_data_structures::fx::FxHashSet; use rustc_errors::PResult; use rustc_session::parse::ParseSess; -use rustc_span::{DUMMY_SP, Span, sym}; +use rustc_span::{DUMMY_SP, sym}; -use super::{ - Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange, - TokenCursor, Trailing, -}; +use super::{Capturing, ForceCollect, Parser, Trailing}; // When collecting tokens, this fully captures the start point. Usually its // just after outer attributes, but occasionally it's before. @@ -94,95 +91,6 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool { }) } -// From a value of this type we can reconstruct the `TokenStream` seen by the -// `f` callback passed to a call to `Parser::collect_tokens`, by -// replaying the getting of the tokens. This saves us producing a `TokenStream` -// if it is never needed, e.g. a captured `macro_rules!` argument that is never -// passed to a proc macro. In practice, token stream creation happens rarely -// compared to calls to `collect_tokens` (see some statistics in #78736) so we -// are doing as little up-front work as possible. -// -// This also makes `Parser` very cheap to clone, since -// there is no intermediate collection buffer to clone. -struct LazyAttrTokenStreamImpl { - start_token: (Token, Spacing), - cursor_snapshot: TokenCursor, - num_calls: u32, - break_last_token: u32, - node_replacements: Box<[NodeReplacement]>, -} - -impl ToAttrTokenStream for LazyAttrTokenStreamImpl { - fn to_attr_token_stream(&self) -> AttrTokenStream { - // The token produced by the final call to `{,inlined_}next` was not - // actually consumed by the callback. The combination of chaining the - // initial token and using `take` produces the desired result - we - // produce an empty `TokenStream` if no calls were made, and omit the - // final token otherwise. - let mut cursor_snapshot = self.cursor_snapshot.clone(); - let tokens = iter::once(FlatToken::Token(self.start_token)) - .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) - .take(self.num_calls as usize); - - if self.node_replacements.is_empty() { - make_attr_token_stream(tokens, self.break_last_token) - } else { - let mut tokens: Vec<_> = tokens.collect(); - let mut node_replacements = self.node_replacements.to_vec(); - node_replacements.sort_by_key(|(range, _)| range.0.start); - - #[cfg(debug_assertions)] - for [(node_range, tokens), (next_node_range, next_tokens)] in - node_replacements.array_windows() - { - assert!( - node_range.0.end <= next_node_range.0.start - || node_range.0.end >= next_node_range.0.end, - "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", - node_range, - tokens, - next_node_range, - next_tokens, - ); - } - - // Process the replace ranges, starting from the highest start - // position and working our way back. If have tokens like: - // - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // Then we will generate replace ranges for both - // the `#[cfg(FALSE)] field: bool` and the entire - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // By starting processing from the replace range with the greatest - // start position, we ensure that any (outer) replace range which - // encloses another (inner) replace range will fully overwrite the - // inner range's replacement. - for (node_range, target) in node_replacements.into_iter().rev() { - assert!( - !node_range.0.is_empty(), - "Cannot replace an empty node range: {:?}", - node_range.0 - ); - - // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus - // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the - // total length of `tokens` constant throughout the replacement process, allowing - // us to do all replacements without adjusting indices. - let target_len = target.is_some() as usize; - tokens.splice( - (node_range.0.start as usize)..(node_range.0.end as usize), - target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain( - iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len), - ), - ); - } - make_attr_token_stream(tokens.into_iter(), self.break_last_token) - } - } -} - impl<'a> Parser<'a> { pub(super) fn collect_pos(&self) -> CollectPos { CollectPos { @@ -483,71 +391,6 @@ impl<'a> Parser<'a> { } } -/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an -/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and -/// close delims. -fn make_attr_token_stream( - iter: impl Iterator, - break_last_token: u32, -) -> AttrTokenStream { - #[derive(Debug)] - struct FrameData { - // This is `None` for the first frame, `Some` for all others. - open_delim_sp: Option<(Delimiter, Span, Spacing)>, - inner: Vec, - } - // The stack always has at least one element. Storing it separately makes for shorter code. - let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] }; - let mut stack_rest = vec![]; - for flat_token in iter { - match flat_token { - FlatToken::Token((token @ Token { kind, span }, spacing)) => { - if let Some(delim) = kind.open_delim() { - stack_rest.push(mem::replace( - &mut stack_top, - FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, - )); - } else if let Some(delim) = kind.close_delim() { - let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); - let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); - assert!( - open_delim.eq_ignoring_invisible_origin(&delim), - "Mismatched open/close delims: open={open_delim:?} close={span:?}" - ); - let dspan = DelimSpan::from_pair(open_sp, span); - let dspacing = DelimSpacing::new(open_spacing, spacing); - let stream = AttrTokenStream::new(frame_data.inner); - let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream); - stack_top.inner.push(delimited); - } else { - stack_top.inner.push(AttrTokenTree::Token(token, spacing)) - } - } - FlatToken::AttrsTarget(target) => { - stack_top.inner.push(AttrTokenTree::AttrsTarget(target)) - } - FlatToken::Empty => {} - } - } - - if break_last_token > 0 { - let last_token = stack_top.inner.pop().unwrap(); - if let AttrTokenTree::Token(last_token, spacing) = last_token { - let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap(); - - // Tokens are always ASCII chars, so we can use byte arithmetic here. - let mut first_span = last_token.span.shrink_to_lo(); - first_span = - first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token)); - - stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing)); - } else { - panic!("Unexpected last token {last_token:?}") - } - } - AttrTokenStream::new(stack_top.inner) -} - /// Tokens are needed if: /// - any non-single-segment attributes (other than doc comments) are present, /// e.g. `rustfmt::skip`; or diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 48df8b59d55..5500fba58a5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -12,7 +12,6 @@ pub mod token_type; mod ty; use std::assert_matches::debug_assert_matches; -use std::ops::Range; use std::{fmt, mem, slice}; use attr_wrapper::{AttrWrapper, UsePreAttrPos}; @@ -25,7 +24,9 @@ use rustc_ast::ptr::P; use rustc_ast::token::{ self, IdentIsRaw, InvisibleOrigin, MetaVarKind, NtExprKind, NtPatKind, Token, TokenKind, }; -use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree}; +use rustc_ast::tokenstream::{ + ParserRange, ParserReplacement, Spacing, TokenCursor, TokenStream, TokenTree, TokenTreeCursor, +}; use rustc_ast::util::case::Case; use rustc_ast::{ self as ast, AnonConst, AttrArgs, AttrId, ByRef, Const, CoroutineKind, DUMMY_NODE_ID, @@ -37,7 +38,7 @@ use rustc_data_structures::fx::FxHashMap; use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult}; use rustc_index::interval::IntervalSet; use rustc_session::parse::ParseSess; -use rustc_span::{DUMMY_SP, Ident, Span, Symbol, kw, sym}; +use rustc_span::{Ident, Span, Symbol, kw, sym}; use thin_vec::ThinVec; use token_type::TokenTypeSet; pub use token_type::{ExpKeywordPair, ExpTokenPair, TokenType}; @@ -187,57 +188,6 @@ struct ClosureSpans { body: Span, } -/// A token range within a `Parser`'s full token stream. -#[derive(Clone, Debug)] -struct ParserRange(Range); - -/// A token range within an individual AST node's (lazy) token stream, i.e. -/// relative to that node's first token. Distinct from `ParserRange` so the two -/// kinds of range can't be mixed up. -#[derive(Clone, Debug)] -struct NodeRange(Range); - -/// Indicates a range of tokens that should be replaced by an `AttrsTarget` -/// (replacement) or be replaced by nothing (deletion). This is used in two -/// places during token collection. -/// -/// 1. Replacement. During the parsing of an AST node that may have a -/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]` -/// or `#[cfg_attr]`, we replace the entire inner AST node with -/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an -/// `AttrTokenStream`. -/// -/// 2. Deletion. We delete inner attributes from all collected token streams, -/// and instead track them through the `attrs` field on the AST node. This -/// lets us manipulate them similarly to outer attributes. When we create a -/// `TokenStream`, the inner attributes are inserted into the proper place -/// in the token stream. -/// -/// Each replacement starts off in `ParserReplacement` form but is converted to -/// `NodeReplacement` form when it is attached to a single AST node, via -/// `LazyAttrTokenStreamImpl`. -type ParserReplacement = (ParserRange, Option); - -/// See the comment on `ParserReplacement`. -type NodeReplacement = (NodeRange, Option); - -impl NodeRange { - // Converts a range within a parser's tokens to a range within a - // node's tokens beginning at `start_pos`. - // - // For example, imagine a parser with 50 tokens in its token stream, a - // function that spans `ParserRange(20..40)` and an inner attribute within - // that function that spans `ParserRange(30..35)`. We would find the inner - // attribute's range within the function's tokens by subtracting 20, which - // is the position of the function's start token. This gives - // `NodeRange(10..15)`. - fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange { - assert!(!parser_range.is_empty()); - assert!(parser_range.start >= start_pos); - NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos)) - } -} - /// Controls how we capture tokens. Capturing can be expensive, /// so we try to avoid performing capturing in cases where /// we will never need an `AttrTokenStream`. @@ -260,104 +210,6 @@ struct CaptureState { seen_attrs: IntervalSet, } -#[derive(Clone, Debug)] -struct TokenTreeCursor { - stream: TokenStream, - /// Points to the current token tree in the stream. In `TokenCursor::curr`, - /// this can be any token tree. In `TokenCursor::stack`, this is always a - /// `TokenTree::Delimited`. - index: usize, -} - -impl TokenTreeCursor { - #[inline] - fn new(stream: TokenStream) -> Self { - TokenTreeCursor { stream, index: 0 } - } - - #[inline] - fn curr(&self) -> Option<&TokenTree> { - self.stream.get(self.index) - } - - fn look_ahead(&self, n: usize) -> Option<&TokenTree> { - self.stream.get(self.index + n) - } - - #[inline] - fn bump(&mut self) { - self.index += 1; - } -} - -/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that -/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) -/// use this type to emit them as a linear sequence. But a linear sequence is -/// what the parser expects, for the most part. -#[derive(Clone, Debug)] -struct TokenCursor { - // Cursor for the current (innermost) token stream. The index within the - // cursor can point to any token tree in the stream (or one past the end). - // The delimiters for this token stream are found in `self.stack.last()`; - // if that is `None` we are in the outermost token stream which never has - // delimiters. - curr: TokenTreeCursor, - - // Token streams surrounding the current one. The index within each cursor - // always points to a `TokenTree::Delimited`. - stack: Vec, -} - -impl TokenCursor { - fn next(&mut self) -> (Token, Spacing) { - self.inlined_next() - } - - /// This always-inlined version should only be used on hot code paths. - #[inline(always)] - fn inlined_next(&mut self) -> (Token, Spacing) { - loop { - // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix - // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions - // below can be removed. - if let Some(tree) = self.curr.curr() { - match tree { - &TokenTree::Token(token, spacing) => { - debug_assert!(!token.kind.is_delim()); - let res = (token, spacing); - self.curr.bump(); - return res; - } - &TokenTree::Delimited(sp, spacing, delim, ref tts) => { - let trees = TokenTreeCursor::new(tts.clone()); - self.stack.push(mem::replace(&mut self.curr, trees)); - if !delim.skip() { - return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open); - } - // No open delimiter to return; continue on to the next iteration. - } - }; - } else if let Some(parent) = self.stack.pop() { - // We have exhausted this token stream. Move back to its parent token stream. - let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else { - panic!("parent should be Delimited") - }; - self.curr = parent; - self.curr.bump(); // move past the `Delimited` - if !delim.skip() { - return (Token::new(delim.as_close_token_kind(), span.close), spacing.close); - } - // No close delimiter to return; continue on to the next iteration. - } else { - // We have exhausted the outermost token stream. The use of - // `Spacing::Alone` is arbitrary and immaterial, because the - // `Eof` token's spacing is never used. - return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); - } - } - } -} - /// A sequence separator. #[derive(Debug)] struct SeqSep<'a> { @@ -1742,26 +1594,6 @@ impl<'a> Parser<'a> { } } -/// A helper struct used when building an `AttrTokenStream` from -/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens -/// are stored as `FlatToken::Token`. A vector of `FlatToken`s -/// is then 'parsed' to build up an `AttrTokenStream` with nested -/// `AttrTokenTree::Delimited` tokens. -#[derive(Debug, Clone)] -enum FlatToken { - /// A token - this holds both delimiter (e.g. '{' and '}') - /// and non-delimiter tokens - Token((Token, Spacing)), - /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted - /// directly into the constructed `AttrTokenStream` as an - /// `AttrTokenTree::AttrsTarget`. - AttrsTarget(AttrsTarget), - /// A special 'empty' token that is ignored during the conversion - /// to an `AttrTokenStream`. This is used to simplify the - /// handling of replace ranges. - Empty, -} - // Metavar captures of various kinds. #[derive(Clone, Debug)] pub enum ParseNtResult { -- cgit 1.4.1-3-g733a5 From 298c56f4ba604d3c7025a44fe7bfe1134d6b56d6 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Apr 2025 11:57:27 +1000 Subject: Simplify `LazyAttrTokenStream`. This commit does the following. - Changes it from `Lrc>` to `Lrc`. - Reworks `LazyAttrTokenStreamImpl` as `LazyAttrTokenStreamInner`, which is a two-variant enum. - Removes the `ToAttrTokenStream` trait and the two impls of it. The recursion limit must be increased in some crates otherwise rustdoc aborts. --- compiler/rustc_ast/src/lib.rs | 1 + compiler/rustc_ast/src/mut_visit.rs | 2 +- compiler/rustc_ast/src/tokenstream.rs | 214 +++++++++++++----------- compiler/rustc_attr_parsing/src/lib.rs | 1 + compiler/rustc_builtin_macros/src/lib.rs | 1 + compiler/rustc_codegen_ssa/src/lib.rs | 1 + compiler/rustc_expand/src/config.rs | 8 +- compiler/rustc_hir/src/lib.rs | 1 + compiler/rustc_middle/src/lib.rs | 1 + compiler/rustc_parse/src/lib.rs | 1 + compiler/rustc_parse/src/parser/attr_wrapper.rs | 24 +-- compiler/rustc_resolve/src/lib.rs | 1 + 12 files changed, 138 insertions(+), 118 deletions(-) (limited to 'compiler/rustc_parse/src') diff --git a/compiler/rustc_ast/src/lib.rs b/compiler/rustc_ast/src/lib.rs index bb16b48a641..e572ec99dab 100644 --- a/compiler/rustc_ast/src/lib.rs +++ b/compiler/rustc_ast/src/lib.rs @@ -20,6 +20,7 @@ #![feature(never_type)] #![feature(rustdoc_internals)] #![feature(stmt_expr_attributes)] +#![recursion_limit = "256"] // tidy-alphabetical-end pub mod util { diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index 6aae2e481a5..28808716700 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -835,7 +835,7 @@ fn visit_lazy_tts_opt_mut(vis: &mut T, lazy_tts: Option<&mut Lazy if let Some(lazy_tts) = lazy_tts { let mut tts = lazy_tts.to_attr_token_stream(); visit_attr_tts(vis, &mut tts); - *lazy_tts = LazyAttrTokenStream::new(tts); + *lazy_tts = LazyAttrTokenStream::new_direct(tts); } } } diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 0f63a248fef..4f352e20da2 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -107,25 +107,30 @@ where } } -pub trait ToAttrTokenStream: sync::DynSend + sync::DynSync { - fn to_attr_token_stream(&self) -> AttrTokenStream; -} - -impl ToAttrTokenStream for AttrTokenStream { - fn to_attr_token_stream(&self) -> AttrTokenStream { - self.clone() - } -} - -/// A lazy version of [`TokenStream`], which defers creation -/// of an actual `TokenStream` until it is needed. -/// `Box` is here only to reduce the structure size. +/// A lazy version of [`AttrTokenStream`], which defers creation of an actual +/// `AttrTokenStream` until it is needed. #[derive(Clone)] -pub struct LazyAttrTokenStream(Arc>); +pub struct LazyAttrTokenStream(Arc); impl LazyAttrTokenStream { - pub fn new(inner: impl ToAttrTokenStream + 'static) -> LazyAttrTokenStream { - LazyAttrTokenStream(Arc::new(Box::new(inner))) + pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream { + LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Direct(stream))) + } + + pub fn new_pending( + start_token: (Token, Spacing), + cursor_snapshot: TokenCursor, + num_calls: u32, + break_last_token: u32, + node_replacements: Box<[NodeReplacement]>, + ) -> LazyAttrTokenStream { + LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending { + start_token, + cursor_snapshot, + num_calls, + break_last_token, + node_replacements, + })) } pub fn to_attr_token_stream(&self) -> AttrTokenStream { @@ -208,91 +213,109 @@ impl NodeRange { } } -// From a value of this type we can reconstruct the `TokenStream` seen by the -// `f` callback passed to a call to `Parser::collect_tokens`, by -// replaying the getting of the tokens. This saves us producing a `TokenStream` -// if it is never needed, e.g. a captured `macro_rules!` argument that is never -// passed to a proc macro. In practice, token stream creation happens rarely -// compared to calls to `collect_tokens` (see some statistics in #78736) so we -// are doing as little up-front work as possible. -// -// This also makes `Parser` very cheap to clone, since -// there is no intermediate collection buffer to clone. -pub struct LazyAttrTokenStreamImpl { - pub start_token: (Token, Spacing), - pub cursor_snapshot: TokenCursor, - pub num_calls: u32, - pub break_last_token: u32, - pub node_replacements: Box<[NodeReplacement]>, +enum LazyAttrTokenStreamInner { + // The token stream has already been produced. + Direct(AttrTokenStream), + + // From a value of this type we can reconstruct the `TokenStream` seen by + // the `f` callback passed to a call to `Parser::collect_tokens`, by + // replaying the getting of the tokens. This saves us producing a + // `TokenStream` if it is never needed, e.g. a captured `macro_rules!` + // argument that is never passed to a proc macro. In practice, token stream + // creation happens rarely compared to calls to `collect_tokens` (see some + // statistics in #78736) so we are doing as little up-front work as + // possible. + // + // This also makes `Parser` very cheap to clone, since there is no + // intermediate collection buffer to clone. + Pending { + start_token: (Token, Spacing), + cursor_snapshot: TokenCursor, + num_calls: u32, + break_last_token: u32, + node_replacements: Box<[NodeReplacement]>, + }, } -impl ToAttrTokenStream for LazyAttrTokenStreamImpl { +impl LazyAttrTokenStreamInner { fn to_attr_token_stream(&self) -> AttrTokenStream { - // The token produced by the final call to `{,inlined_}next` was not - // actually consumed by the callback. The combination of chaining the - // initial token and using `take` produces the desired result - we - // produce an empty `TokenStream` if no calls were made, and omit the - // final token otherwise. - let mut cursor_snapshot = self.cursor_snapshot.clone(); - let tokens = iter::once(FlatToken::Token(self.start_token)) - .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) - .take(self.num_calls as usize); - - if self.node_replacements.is_empty() { - make_attr_token_stream(tokens, self.break_last_token) - } else { - let mut tokens: Vec<_> = tokens.collect(); - let mut node_replacements = self.node_replacements.to_vec(); - node_replacements.sort_by_key(|(range, _)| range.0.start); + match self { + LazyAttrTokenStreamInner::Direct(stream) => stream.clone(), + LazyAttrTokenStreamInner::Pending { + start_token, + cursor_snapshot, + num_calls, + break_last_token, + node_replacements, + } => { + // The token produced by the final call to `{,inlined_}next` was not + // actually consumed by the callback. The combination of chaining the + // initial token and using `take` produces the desired result - we + // produce an empty `TokenStream` if no calls were made, and omit the + // final token otherwise. + let mut cursor_snapshot = cursor_snapshot.clone(); + let tokens = iter::once(FlatToken::Token(*start_token)) + .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) + .take(*num_calls as usize); + + if node_replacements.is_empty() { + make_attr_token_stream(tokens, *break_last_token) + } else { + let mut tokens: Vec<_> = tokens.collect(); + let mut node_replacements = node_replacements.to_vec(); + node_replacements.sort_by_key(|(range, _)| range.0.start); - #[cfg(debug_assertions)] - for [(node_range, tokens), (next_node_range, next_tokens)] in - node_replacements.array_windows() - { - assert!( - node_range.0.end <= next_node_range.0.start - || node_range.0.end >= next_node_range.0.end, - "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", - node_range, - tokens, - next_node_range, - next_tokens, - ); - } + #[cfg(debug_assertions)] + for [(node_range, tokens), (next_node_range, next_tokens)] in + node_replacements.array_windows() + { + assert!( + node_range.0.end <= next_node_range.0.start + || node_range.0.end >= next_node_range.0.end, + "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", + node_range, + tokens, + next_node_range, + next_tokens, + ); + } - // Process the replace ranges, starting from the highest start - // position and working our way back. If have tokens like: - // - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // Then we will generate replace ranges for both - // the `#[cfg(FALSE)] field: bool` and the entire - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // By starting processing from the replace range with the greatest - // start position, we ensure that any (outer) replace range which - // encloses another (inner) replace range will fully overwrite the - // inner range's replacement. - for (node_range, target) in node_replacements.into_iter().rev() { - assert!( - !node_range.0.is_empty(), - "Cannot replace an empty node range: {:?}", - node_range.0 - ); - - // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus - // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the - // total length of `tokens` constant throughout the replacement process, allowing - // us to do all replacements without adjusting indices. - let target_len = target.is_some() as usize; - tokens.splice( - (node_range.0.start as usize)..(node_range.0.end as usize), - target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain( - iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len), - ), - ); + // Process the replace ranges, starting from the highest start + // position and working our way back. If have tokens like: + // + // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` + // + // Then we will generate replace ranges for both + // the `#[cfg(FALSE)] field: bool` and the entire + // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` + // + // By starting processing from the replace range with the greatest + // start position, we ensure that any (outer) replace range which + // encloses another (inner) replace range will fully overwrite the + // inner range's replacement. + for (node_range, target) in node_replacements.into_iter().rev() { + assert!( + !node_range.0.is_empty(), + "Cannot replace an empty node range: {:?}", + node_range.0 + ); + + // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, + // plus enough `FlatToken::Empty`s to fill up the rest of the range. This + // keeps the total length of `tokens` constant throughout the replacement + // process, allowing us to do all replacements without adjusting indices. + let target_len = target.is_some() as usize; + tokens.splice( + (node_range.0.start as usize)..(node_range.0.end as usize), + target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain( + iter::repeat(FlatToken::Empty) + .take(node_range.0.len() - target_len), + ), + ); + } + make_attr_token_stream(tokens.into_iter(), *break_last_token) + } } - make_attr_token_stream(tokens.into_iter(), self.break_last_token) } } } @@ -1011,6 +1034,7 @@ mod size_asserts { static_assert_size!(AttrTokenStream, 8); static_assert_size!(AttrTokenTree, 32); static_assert_size!(LazyAttrTokenStream, 8); + static_assert_size!(LazyAttrTokenStreamInner, 96); static_assert_size!(Option, 8); // must be small, used in many AST nodes static_assert_size!(TokenStream, 8); static_assert_size!(TokenTree, 32); diff --git a/compiler/rustc_attr_parsing/src/lib.rs b/compiler/rustc_attr_parsing/src/lib.rs index b9692c01e2c..874fccf7ff6 100644 --- a/compiler/rustc_attr_parsing/src/lib.rs +++ b/compiler/rustc_attr_parsing/src/lib.rs @@ -80,6 +80,7 @@ #![cfg_attr(bootstrap, feature(let_chains))] #![doc(rust_logo)] #![feature(rustdoc_internals)] +#![recursion_limit = "256"] // tidy-alphabetical-end #[macro_use] diff --git a/compiler/rustc_builtin_macros/src/lib.rs b/compiler/rustc_builtin_macros/src/lib.rs index 70e817db2a6..c2f5bf0f457 100644 --- a/compiler/rustc_builtin_macros/src/lib.rs +++ b/compiler/rustc_builtin_macros/src/lib.rs @@ -18,6 +18,7 @@ #![feature(rustdoc_internals)] #![feature(string_from_utf8_lossy_owned)] #![feature(try_blocks)] +#![recursion_limit = "256"] // tidy-alphabetical-end extern crate proc_macro; diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index c927aae2c4c..d799bb25c01 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -14,6 +14,7 @@ #![feature(string_from_utf8_lossy_owned)] #![feature(trait_alias)] #![feature(try_blocks)] +#![recursion_limit = "256"] // tidy-alphabetical-end //! This crate contains codegen code that is used by all codegen backends (LLVM and others). diff --git a/compiler/rustc_expand/src/config.rs b/compiler/rustc_expand/src/config.rs index d2e45d717d9..2df3281568b 100644 --- a/compiler/rustc_expand/src/config.rs +++ b/compiler/rustc_expand/src/config.rs @@ -162,7 +162,7 @@ pub(crate) fn attr_into_trace(mut attr: Attribute, trace_name: Symbol) -> Attrib let NormalAttr { item, tokens } = &mut **normal; item.path.segments[0].ident.name = trace_name; // This makes the trace attributes unobservable to token-based proc macros. - *tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::default())); + *tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::default())); } AttrKind::DocComment(..) => unreachable!(), } @@ -192,7 +192,7 @@ impl<'a> StripUnconfigured<'a> { if self.config_tokens { if let Some(Some(tokens)) = node.tokens_mut() { let attr_stream = tokens.to_attr_token_stream(); - *tokens = LazyAttrTokenStream::new(self.configure_tokens(&attr_stream)); + *tokens = LazyAttrTokenStream::new_direct(self.configure_tokens(&attr_stream)); } } } @@ -223,7 +223,7 @@ impl<'a> StripUnconfigured<'a> { target.attrs.flat_map_in_place(|attr| self.process_cfg_attr(&attr)); if self.in_cfg(&target.attrs) { - target.tokens = LazyAttrTokenStream::new( + target.tokens = LazyAttrTokenStream::new_direct( self.configure_tokens(&target.tokens.to_attr_token_stream()), ); Some(AttrTokenTree::AttrsTarget(target)) @@ -361,7 +361,7 @@ impl<'a> StripUnconfigured<'a> { .to_attr_token_stream(), )); - let tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::new(trees))); + let tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::new(trees))); let attr = ast::attr::mk_attr_from_item( &self.sess.psess.attr_id_generator, item, diff --git a/compiler/rustc_hir/src/lib.rs b/compiler/rustc_hir/src/lib.rs index 32064f96dd6..5533920aee4 100644 --- a/compiler/rustc_hir/src/lib.rs +++ b/compiler/rustc_hir/src/lib.rs @@ -14,6 +14,7 @@ #![feature(never_type)] #![feature(rustc_attrs)] #![feature(variant_count)] +#![recursion_limit = "256"] // tidy-alphabetical-end extern crate self as rustc_hir; diff --git a/compiler/rustc_middle/src/lib.rs b/compiler/rustc_middle/src/lib.rs index df025aeebf0..711036865bb 100644 --- a/compiler/rustc_middle/src/lib.rs +++ b/compiler/rustc_middle/src/lib.rs @@ -61,6 +61,7 @@ #![feature(try_trait_v2_yeet)] #![feature(type_alias_impl_trait)] #![feature(yeet_expr)] +#![recursion_limit = "256"] // tidy-alphabetical-end #[cfg(test)] diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 37204702bcb..d06922f1e04 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -11,6 +11,7 @@ #![feature(if_let_guard)] #![feature(iter_intersperse)] #![feature(string_from_utf8_lossy_owned)] +#![recursion_limit = "256"] // tidy-alphabetical-end use std::path::{Path, PathBuf}; diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 912045d8835..835226a64af 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -3,8 +3,7 @@ use std::mem; use rustc_ast::token::Token; use rustc_ast::tokenstream::{ - AttrsTarget, LazyAttrTokenStream, LazyAttrTokenStreamImpl, NodeRange, ParserRange, Spacing, - TokenCursor, + AttrsTarget, LazyAttrTokenStream, NodeRange, ParserRange, Spacing, TokenCursor, }; use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens}; use rustc_data_structures::fx::FxHashSet; @@ -337,13 +336,13 @@ impl<'a> Parser<'a> { // - `attrs`: includes the outer and the inner attr. // - `tokens`: lazy tokens for `g` (with its inner attr deleted). - let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl { - start_token: collect_pos.start_token, - cursor_snapshot: collect_pos.cursor_snapshot, + let tokens = LazyAttrTokenStream::new_pending( + collect_pos.start_token, + collect_pos.cursor_snapshot, num_calls, - break_last_token: self.break_last_token, + self.break_last_token, node_replacements, - }); + ); let mut tokens_used = false; // If in "definite capture mode" we need to register a replace range @@ -405,14 +404,3 @@ fn needs_tokens(attrs: &[ast::Attribute]) -> bool { } }) } - -// Some types are used a lot. Make sure they don't unintentionally get bigger. -#[cfg(target_pointer_width = "64")] -mod size_asserts { - use rustc_data_structures::static_assert_size; - - use super::*; - // tidy-alphabetical-start - static_assert_size!(LazyAttrTokenStreamImpl, 96); - // tidy-alphabetical-end -} diff --git a/compiler/rustc_resolve/src/lib.rs b/compiler/rustc_resolve/src/lib.rs index 4a252a7b528..d2da3ac7d86 100644 --- a/compiler/rustc_resolve/src/lib.rs +++ b/compiler/rustc_resolve/src/lib.rs @@ -19,6 +19,7 @@ #![feature(iter_intersperse)] #![feature(rustc_attrs)] #![feature(rustdoc_internals)] +#![recursion_limit = "256"] // tidy-alphabetical-end use std::cell::{Cell, RefCell}; -- cgit 1.4.1-3-g733a5 From 880e6f716d741b4ef827d48e66c45c7887f82aa2 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 29 Apr 2025 12:05:38 +1000 Subject: Use `ThinVec` to shrink `LazyAttrTokenStreamInner`. --- compiler/rustc_ast/src/tokenstream.rs | 7 ++++--- compiler/rustc_parse/src/parser/attr_wrapper.rs | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'compiler/rustc_parse/src') diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 4f352e20da2..636c26bcde0 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -23,6 +23,7 @@ use rustc_data_structures::sync; use rustc_macros::{Decodable, Encodable, HashStable_Generic}; use rustc_serialize::{Decodable, Encodable}; use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym}; +use thin_vec::ThinVec; use crate::ast::AttrStyle; use crate::ast_traits::{HasAttrs, HasTokens}; @@ -122,7 +123,7 @@ impl LazyAttrTokenStream { cursor_snapshot: TokenCursor, num_calls: u32, break_last_token: u32, - node_replacements: Box<[NodeReplacement]>, + node_replacements: ThinVec, ) -> LazyAttrTokenStream { LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending { start_token, @@ -233,7 +234,7 @@ enum LazyAttrTokenStreamInner { cursor_snapshot: TokenCursor, num_calls: u32, break_last_token: u32, - node_replacements: Box<[NodeReplacement]>, + node_replacements: ThinVec, }, } @@ -1034,7 +1035,7 @@ mod size_asserts { static_assert_size!(AttrTokenStream, 8); static_assert_size!(AttrTokenTree, 32); static_assert_size!(LazyAttrTokenStream, 8); - static_assert_size!(LazyAttrTokenStreamInner, 96); + static_assert_size!(LazyAttrTokenStreamInner, 88); static_assert_size!(Option, 8); // must be small, used in many AST nodes static_assert_size!(TokenStream, 8); static_assert_size!(TokenTree, 32); diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 835226a64af..44fdf146f9c 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -10,6 +10,7 @@ use rustc_data_structures::fx::FxHashSet; use rustc_errors::PResult; use rustc_session::parse::ParseSess; use rustc_span::{DUMMY_SP, sym}; +use thin_vec::ThinVec; use super::{Capturing, ForceCollect, Parser, Trailing}; @@ -294,10 +295,10 @@ impl<'a> Parser<'a> { // This is hot enough for `deep-vector` that checking the conditions for an empty iterator // is measurably faster than actually executing the iterator. - let node_replacements: Box<[_]> = if parser_replacements_start == parser_replacements_end + let node_replacements = if parser_replacements_start == parser_replacements_end && inner_attr_parser_replacements.is_empty() { - Box::new([]) + ThinVec::new() } else { // Grab any replace ranges that occur *inside* the current AST node. Convert them // from `ParserRange` form to `NodeRange` form. We will perform the actual -- cgit 1.4.1-3-g733a5 From 2b92f9fb98e16e3ed85166e14936be0c3f11c945 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 29 Apr 2025 18:05:45 -0700 Subject: Parser: Document restrictions I had trouble easily understanding what these various flags do. This is my attempt to try to explain what these do. --- compiler/rustc_parse/src/parser/mod.rs | 49 ++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'compiler/rustc_parse/src') diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 48df8b59d55..db7499dcce9 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -61,13 +61,62 @@ mod mut_visit { } bitflags::bitflags! { + /// Restrictions applied while parsing. + /// + /// The parser maintains a bitset of restrictions it will honor while + /// parsing. This is essentially used as a way of tracking state of what + /// is being parsed and to change behavior based on that. #[derive(Clone, Copy, Debug)] struct Restrictions: u8 { + /// Restricts expressions for use in statement position. + /// + /// When expressions are used in various places, like statements or + /// match arms, this is used to stop parsing once certain tokens are + /// reached. + /// + /// For example, `if true {} & 1` with `STMT_EXPR` in effect is parsed + /// as two separate expression statements (`if` and a reference to 1). + /// Otherwise it is parsed as a bitwise AND where `if` is on the left + /// and 1 is on the right. const STMT_EXPR = 1 << 0; + /// Do not allow struct literals. + /// + /// There are several places in the grammar where we don't want to + /// allow struct literals because they can require lookahead, or + /// otherwise could be ambiguous or cause confusion. For example, + /// `if Foo {} {}` isn't clear if it is `Foo{}` struct literal, or + /// just `Foo` is the condition, followed by a consequent block, + /// followed by an empty block. + /// + /// See [RFC 92](https://rust-lang.github.io/rfcs/0092-struct-grammar.html). const NO_STRUCT_LITERAL = 1 << 1; + /// Used to provide better error messages for const generic arguments. + /// + /// An un-braced const generic argument is limited to a very small + /// subset of expressions. This is used to detect the situation where + /// an expression outside of that subset is used, and to suggest to + /// wrap the expression in braces. const CONST_EXPR = 1 << 2; + /// Allows `let` expressions. + /// + /// `let pattern = scrutinee` is parsed as an expression, but it is + /// only allowed in let chains (`if` and `while` conditions). + /// Otherwise it is not an expression (note that `let` in statement + /// positions is treated as a `StmtKind::Let` statement, which has a + /// slightly different grammar). const ALLOW_LET = 1 << 3; + /// Used to detect a missing `=>` in a match guard. + /// + /// This is used for error handling in a match guard to give a better + /// error message if the `=>` is missing. It is set when parsing the + /// guard expression. const IN_IF_GUARD = 1 << 4; + /// Used to detect the incorrect use of expressions in patterns. + /// + /// This is used for error handling while parsing a pattern. During + /// error recovery, this will be set to try to parse the pattern as an + /// expression, but halts parsing the expression when reaching certain + /// tokens like `=`. const IS_PAT = 1 << 5; } } -- cgit 1.4.1-3-g733a5 From 6668d13de27be6382f06cbdc253d9a48f62e4c34 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Tue, 29 Apr 2025 09:59:17 +0300 Subject: ast: Remove token visiting from AST visitor It's no longer necessary after the removal of nonterminal tokens in #124141. --- compiler/rustc_ast/src/mut_visit.rs | 139 +++------------------ compiler/rustc_expand/src/mbe/transcribe.rs | 32 ++--- compiler/rustc_parse/src/parser/mod.rs | 4 - compiler/rustc_parse/src/parser/mut_visit/tests.rs | 65 ---------- compiler/rustc_parse/src/parser/tests.rs | 6 - tests/ui-fulldeps/auxiliary/parser.rs | 2 - 6 files changed, 32 insertions(+), 216 deletions(-) delete mode 100644 compiler/rustc_parse/src/parser/mut_visit/tests.rs (limited to 'compiler/rustc_parse/src') diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index bef04dc4048..e49886721e3 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -9,7 +9,6 @@ use std::ops::DerefMut; use std::panic; -use std::sync::Arc; use rustc_data_structures::flat_map_in_place::FlatMapInPlace; use rustc_data_structures::stack::ensure_sufficient_stack; @@ -20,7 +19,6 @@ use thin_vec::ThinVec; use crate::ast::*; use crate::ptr::P; -use crate::token::{self, Token}; use crate::tokenstream::*; use crate::visit::{AssocCtxt, BoundKind, FnCtxt}; @@ -48,11 +46,6 @@ pub trait WalkItemKind { } pub trait MutVisitor: Sized { - /// Mutable token visiting only exists for the `macro_rules` token marker and should not be - /// used otherwise. Token visitor would be entirely separate from the regular visitor if - /// the marker didn't have to visit AST fragments in nonterminal tokens. - const VISIT_TOKENS: bool = false; - // Methods in this trait have one of three forms: // // fn visit_t(&mut self, t: &mut T); // common @@ -360,6 +353,8 @@ pub trait MutVisitor: Sized { // Do nothing. } + // Span visiting is no longer used, but we keep it for now, + // in case it's needed for something like #127241. fn visit_span(&mut self, _sp: &mut Span) { // Do nothing. } @@ -473,12 +468,8 @@ fn visit_attr_args(vis: &mut T, args: &mut AttrArgs) { // No `noop_` prefix because there isn't a corresponding method in `MutVisitor`. fn visit_delim_args(vis: &mut T, args: &mut DelimArgs) { - let DelimArgs { dspan, delim: _, tokens } = args; - visit_tts(vis, tokens); - visit_delim_span(vis, dspan); -} - -pub fn visit_delim_span(vis: &mut T, DelimSpan { open, close }: &mut DelimSpan) { + let DelimArgs { dspan, delim: _, tokens: _ } = args; + let DelimSpan { open, close } = dspan; vis.visit_span(open); vis.visit_span(close); } @@ -552,7 +543,7 @@ fn walk_assoc_item_constraint( } pub fn walk_ty(vis: &mut T, ty: &mut P) { - let Ty { id, kind, span, tokens } = ty.deref_mut(); + let Ty { id, kind, span, tokens: _ } = ty.deref_mut(); vis.visit_id(id); match kind { TyKind::Err(_guar) => {} @@ -600,12 +591,11 @@ pub fn walk_ty(vis: &mut T, ty: &mut P) { } TyKind::MacCall(mac) => vis.visit_mac_call(mac), } - visit_lazy_tts(vis, tokens); vis.visit_span(span); } pub fn walk_ty_pat(vis: &mut T, ty: &mut P) { - let TyPat { id, kind, span, tokens } = ty.deref_mut(); + let TyPat { id, kind, span, tokens: _ } = ty.deref_mut(); vis.visit_id(id); match kind { TyPatKind::Range(start, end, _include_end) => { @@ -615,7 +605,6 @@ pub fn walk_ty_pat(vis: &mut T, ty: &mut P) { TyPatKind::Or(variants) => visit_thin_vec(variants, |p| vis.visit_ty_pat(p)), TyPatKind::Err(_) => {} } - visit_lazy_tts(vis, tokens); vis.visit_span(span); } @@ -655,11 +644,10 @@ fn walk_path_segment(vis: &mut T, segment: &mut PathSegment) { visit_opt(args, |args| vis.visit_generic_args(args)); } -fn walk_path(vis: &mut T, Path { segments, span, tokens }: &mut Path) { +fn walk_path(vis: &mut T, Path { segments, span, tokens: _ }: &mut Path) { for segment in segments { vis.visit_path_segment(segment); } - visit_lazy_tts(vis, tokens); vis.visit_span(span); } @@ -705,7 +693,7 @@ fn walk_parenthesized_parameter_data(vis: &mut T, args: &mut Pare } fn walk_local(vis: &mut T, local: &mut P) { - let Local { id, super_, pat, ty, kind, span, colon_sp, attrs, tokens } = local.deref_mut(); + let Local { id, super_, pat, ty, kind, span, colon_sp, attrs, tokens: _ } = local.deref_mut(); visit_opt(super_, |sp| vis.visit_span(sp)); vis.visit_id(id); visit_attrs(vis, attrs); @@ -721,7 +709,6 @@ fn walk_local(vis: &mut T, local: &mut P) { vis.visit_block(els); } } - visit_lazy_tts(vis, tokens); visit_opt(colon_sp, |sp| vis.visit_span(sp)); vis.visit_span(span); } @@ -730,14 +717,10 @@ fn walk_attribute(vis: &mut T, attr: &mut Attribute) { let Attribute { kind, id: _, style: _, span } = attr; match kind { AttrKind::Normal(normal) => { - let NormalAttr { - item: AttrItem { unsafety: _, path, args, tokens }, - tokens: attr_tokens, - } = &mut **normal; + let NormalAttr { item: AttrItem { unsafety: _, path, args, tokens: _ }, tokens: _ } = + &mut **normal; vis.visit_path(path); visit_attr_args(vis, args); - visit_lazy_tts(vis, tokens); - visit_lazy_tts(vis, attr_tokens); } AttrKind::DocComment(_kind, _sym) => {} } @@ -786,90 +769,6 @@ pub fn walk_flat_map_param(vis: &mut T, mut param: Param) -> Smal smallvec![param] } -// No `noop_` prefix because there isn't a corresponding method in `MutVisitor`. -fn visit_attr_tt(vis: &mut T, tt: &mut AttrTokenTree) { - match tt { - AttrTokenTree::Token(token, _spacing) => { - visit_token(vis, token); - } - AttrTokenTree::Delimited(dspan, _spacing, _delim, tts) => { - visit_attr_tts(vis, tts); - visit_delim_span(vis, dspan); - } - AttrTokenTree::AttrsTarget(AttrsTarget { attrs, tokens }) => { - visit_attrs(vis, attrs); - visit_lazy_tts_opt_mut(vis, Some(tokens)); - } - } -} - -// No `noop_` prefix because there isn't a corresponding method in `MutVisitor`. -fn visit_tt(vis: &mut T, tt: &mut TokenTree) { - match tt { - TokenTree::Token(token, _spacing) => { - visit_token(vis, token); - } - TokenTree::Delimited(dspan, _spacing, _delim, tts) => { - visit_tts(vis, tts); - visit_delim_span(vis, dspan); - } - } -} - -// No `noop_` prefix because there isn't a corresponding method in `MutVisitor`. -fn visit_tts(vis: &mut T, TokenStream(tts): &mut TokenStream) { - if T::VISIT_TOKENS && !tts.is_empty() { - let tts = Arc::make_mut(tts); - visit_vec(tts, |tree| visit_tt(vis, tree)); - } -} - -fn visit_attr_tts(vis: &mut T, AttrTokenStream(tts): &mut AttrTokenStream) { - if T::VISIT_TOKENS && !tts.is_empty() { - let tts = Arc::make_mut(tts); - visit_vec(tts, |tree| visit_attr_tt(vis, tree)); - } -} - -fn visit_lazy_tts_opt_mut(vis: &mut T, lazy_tts: Option<&mut LazyAttrTokenStream>) { - if T::VISIT_TOKENS { - if let Some(lazy_tts) = lazy_tts { - let mut tts = lazy_tts.to_attr_token_stream(); - visit_attr_tts(vis, &mut tts); - *lazy_tts = LazyAttrTokenStream::new_direct(tts); - } - } -} - -fn visit_lazy_tts(vis: &mut T, lazy_tts: &mut Option) { - visit_lazy_tts_opt_mut(vis, lazy_tts.as_mut()); -} - -/// Applies ident visitor if it's an ident. In practice this is not actually -/// used by specific visitors right now, but there's a test below checking that -/// it works. -// No `noop_` prefix because there isn't a corresponding method in `MutVisitor`. -pub fn visit_token(vis: &mut T, t: &mut Token) { - let Token { kind, span } = t; - match kind { - token::Ident(name, _is_raw) | token::Lifetime(name, _is_raw) => { - let mut ident = Ident::new(*name, *span); - vis.visit_ident(&mut ident); - *name = ident.name; - *span = ident.span; - return; // Avoid visiting the span for the second time. - } - token::NtIdent(ident, _is_raw) => { - vis.visit_ident(ident); - } - token::NtLifetime(ident, _is_raw) => { - vis.visit_ident(ident); - } - _ => {} - } - vis.visit_span(span); -} - // No `noop_` prefix because there isn't a corresponding method in `MutVisitor`. fn visit_defaultness(vis: &mut T, defaultness: &mut Defaultness) { match defaultness { @@ -1188,10 +1087,9 @@ fn walk_mt(vis: &mut T, MutTy { ty, mutbl: _ }: &mut MutTy) { } pub fn walk_block(vis: &mut T, block: &mut P) { - let Block { id, stmts, rules: _, span, tokens } = block.deref_mut(); + let Block { id, stmts, rules: _, span, tokens: _ } = block.deref_mut(); vis.visit_id(id); stmts.flat_map_in_place(|stmt| vis.flat_map_stmt(stmt)); - visit_lazy_tts(vis, tokens); vis.visit_span(span); } @@ -1472,12 +1370,11 @@ fn walk_item_ctxt( item: &mut P>, ctxt: K::Ctxt, ) { - let Item { attrs, id, kind, vis, span, tokens } = item.deref_mut(); + let Item { attrs, id, kind, vis, span, tokens: _ } = item.deref_mut(); visitor.visit_id(id); visit_attrs(visitor, attrs); visitor.visit_vis(vis); kind.walk(*span, *id, vis, ctxt, visitor); - visit_lazy_tts(visitor, tokens); visitor.visit_span(span); } @@ -1551,7 +1448,7 @@ impl WalkItemKind for ForeignItemKind { } pub fn walk_pat(vis: &mut T, pat: &mut P) { - let Pat { id, kind, span, tokens } = pat.deref_mut(); + let Pat { id, kind, span, tokens: _ } = pat.deref_mut(); vis.visit_id(id); match kind { PatKind::Err(_guar) => {} @@ -1593,7 +1490,6 @@ pub fn walk_pat(vis: &mut T, pat: &mut P) { PatKind::Paren(inner) => vis.visit_pat(inner), PatKind::MacCall(mac) => vis.visit_mac_call(mac), } - visit_lazy_tts(vis, tokens); vis.visit_span(span); } @@ -1657,7 +1553,7 @@ fn walk_format_args(vis: &mut T, fmt: &mut FormatArgs) { vis.visit_span(span); } -pub fn walk_expr(vis: &mut T, Expr { kind, id, span, attrs, tokens }: &mut Expr) { +pub fn walk_expr(vis: &mut T, Expr { kind, id, span, attrs, tokens: _ }: &mut Expr) { vis.visit_id(id); visit_attrs(vis, attrs); match kind { @@ -1848,7 +1744,6 @@ pub fn walk_expr(vis: &mut T, Expr { kind, id, span, attrs, token ExprKind::Err(_guar) => {} ExprKind::Dummy => {} } - visit_lazy_tts(vis, tokens); vis.visit_span(span); } @@ -1890,17 +1785,16 @@ fn walk_flat_map_stmt_kind(vis: &mut T, kind: StmtKind) -> SmallV StmtKind::Semi(expr) => vis.filter_map_expr(expr).into_iter().map(StmtKind::Semi).collect(), StmtKind::Empty => smallvec![StmtKind::Empty], StmtKind::MacCall(mut mac) => { - let MacCallStmt { mac: mac_, style: _, attrs, tokens } = mac.deref_mut(); + let MacCallStmt { mac: mac_, style: _, attrs, tokens: _ } = mac.deref_mut(); visit_attrs(vis, attrs); vis.visit_mac_call(mac_); - visit_lazy_tts(vis, tokens); smallvec![StmtKind::MacCall(mac)] } } } fn walk_vis(vis: &mut T, visibility: &mut Visibility) { - let Visibility { kind, span, tokens } = visibility; + let Visibility { kind, span, tokens: _ } = visibility; match kind { VisibilityKind::Public | VisibilityKind::Inherited => {} VisibilityKind::Restricted { path, id, shorthand: _ } => { @@ -1908,7 +1802,6 @@ fn walk_vis(vis: &mut T, visibility: &mut Visibility) { vis.visit_path(path); } } - visit_lazy_tts(vis, tokens); vis.visit_span(span); } diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index 39186319b1c..2d3fd7702da 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -1,6 +1,5 @@ use std::mem; -use rustc_ast::mut_visit::{self, MutVisitor}; use rustc_ast::token::{ self, Delimiter, IdentIsRaw, InvisibleOrigin, Lit, LitKind, MetaVarKind, Token, TokenKind, }; @@ -29,10 +28,8 @@ use crate::mbe::{self, KleeneOp, MetaVarExpr}; // A Marker adds the given mark to the syntax context. struct Marker(LocalExpnId, Transparency, FxHashMap); -impl MutVisitor for Marker { - const VISIT_TOKENS: bool = true; - - fn visit_span(&mut self, span: &mut Span) { +impl Marker { + fn mark_span(&mut self, span: &mut Span) { // `apply_mark` is a relatively expensive operation, both due to taking hygiene lock, and // by itself. All tokens in a macro body typically have the same syntactic context, unless // it's some advanced case with macro-generated macros. So if we cache the marked version @@ -292,7 +289,7 @@ pub(super) fn transcribe<'a>( // Emit as a token stream within `Delimiter::Invisible` to maintain // parsing priorities. - marker.visit_span(&mut sp); + marker.mark_span(&mut sp); with_metavar_spans(|mspans| mspans.insert(mk_span, sp)); // Both the open delim and close delim get the same span, which covers the // `$foo` in the decl macro RHS. @@ -312,13 +309,13 @@ pub(super) fn transcribe<'a>( maybe_use_metavar_location(psess, &stack, sp, tt, &mut marker) } MatchedSingle(ParseNtResult::Ident(ident, is_raw)) => { - marker.visit_span(&mut sp); + marker.mark_span(&mut sp); with_metavar_spans(|mspans| mspans.insert(ident.span, sp)); let kind = token::NtIdent(*ident, *is_raw); TokenTree::token_alone(kind, sp) } MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => { - marker.visit_span(&mut sp); + marker.mark_span(&mut sp); with_metavar_spans(|mspans| mspans.insert(ident.span, sp)); let kind = token::NtLifetime(*ident, *is_raw); TokenTree::token_alone(kind, sp) @@ -400,8 +397,8 @@ pub(super) fn transcribe<'a>( } else { // If we aren't able to match the meta-var, we push it back into the result but // with modified syntax context. (I believe this supports nested macros). - marker.visit_span(&mut sp); - marker.visit_ident(&mut original_ident); + marker.mark_span(&mut sp); + marker.mark_span(&mut original_ident.span); result.push(TokenTree::token_joint_hidden(token::Dollar, sp)); result.push(TokenTree::Token( Token::from_ast_ident(original_ident), @@ -430,16 +427,19 @@ pub(super) fn transcribe<'a>( // jump back out of the Delimited, pop the result_stack and add the new results back to // the previous results (from outside the Delimited). &mbe::TokenTree::Delimited(mut span, ref spacing, ref delimited) => { - mut_visit::visit_delim_span(&mut marker, &mut span); + marker.mark_span(&mut span.open); + marker.mark_span(&mut span.close); stack.push(Frame::new_delimited(delimited, span, *spacing)); result_stack.push(mem::take(&mut result)); } // Nothing much to do here. Just push the token to the result, being careful to // preserve syntax context. - mbe::TokenTree::Token(token) => { - let mut token = *token; - mut_visit::visit_token(&mut marker, &mut token); + &mbe::TokenTree::Token(mut token) => { + marker.mark_span(&mut token.span); + if let token::NtIdent(ident, _) | token::NtLifetime(ident, _) = &mut token.kind { + marker.mark_span(&mut ident.span); + } let tt = TokenTree::Token(token, Spacing::Alone); result.push(tt); } @@ -504,7 +504,7 @@ fn maybe_use_metavar_location( return orig_tt.clone(); } - marker.visit_span(&mut metavar_span); + marker.mark_span(&mut metavar_span); let no_collision = match orig_tt { TokenTree::Token(token, ..) => { with_metavar_spans(|mspans| mspans.insert(token.span, metavar_span)) @@ -774,7 +774,7 @@ fn transcribe_metavar_expr<'a>( ) -> PResult<'a, ()> { let mut visited_span = || { let mut span = sp.entire(); - marker.visit_span(&mut span); + marker.mark_span(&mut span); span }; match *expr { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 5500fba58a5..1c542ebbebd 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -56,10 +56,6 @@ mod tests; mod tokenstream { mod tests; } -#[cfg(test)] -mod mut_visit { - mod tests; -} bitflags::bitflags! { #[derive(Clone, Copy, Debug)] diff --git a/compiler/rustc_parse/src/parser/mut_visit/tests.rs b/compiler/rustc_parse/src/parser/mut_visit/tests.rs deleted file mode 100644 index 46c678c3902..00000000000 --- a/compiler/rustc_parse/src/parser/mut_visit/tests.rs +++ /dev/null @@ -1,65 +0,0 @@ -use rustc_ast as ast; -use rustc_ast::mut_visit::MutVisitor; -use rustc_ast_pretty::pprust; -use rustc_span::{Ident, create_default_session_globals_then}; - -use crate::parser::tests::{matches_codepattern, string_to_crate}; - -// This version doesn't care about getting comments or doc-strings in. -fn print_crate_items(krate: &ast::Crate) -> String { - krate.items.iter().map(|i| pprust::item_to_string(i)).collect::>().join(" ") -} - -// Change every identifier to "zz". -struct ToZzIdentMutVisitor; - -impl MutVisitor for ToZzIdentMutVisitor { - const VISIT_TOKENS: bool = true; - - fn visit_ident(&mut self, ident: &mut Ident) { - *ident = Ident::from_str("zz"); - } -} - -macro_rules! assert_matches_codepattern { - ($a:expr , $b:expr) => {{ - let a_val = $a; - let b_val = $b; - if !matches_codepattern(&a_val, &b_val) { - panic!("expected args satisfying `matches_codepattern`, got {} and {}", a_val, b_val); - } - }}; -} - -// Make sure idents get transformed everywhere. -#[test] -fn ident_transformation() { - create_default_session_globals_then(|| { - let mut zz_visitor = ToZzIdentMutVisitor; - let mut krate = - string_to_crate("#[a] mod b {fn c (d : e, f : g) {h!(i,j,k);l;m}}".to_string()); - zz_visitor.visit_crate(&mut krate); - assert_matches_codepattern!( - print_crate_items(&krate), - "#[zz]mod zz{fn zz(zz:zz,zz:zz){zz!(zz,zz,zz);zz;zz}}".to_string() - ); - }) -} - -// Make sure idents get transformed even inside macro defs. -#[test] -fn ident_transformation_in_defs() { - create_default_session_globals_then(|| { - let mut zz_visitor = ToZzIdentMutVisitor; - let mut krate = string_to_crate( - "macro_rules! a {(b $c:expr $(d $e:token)f+ => \ - (g $(d $d $e)+))} " - .to_string(), - ); - zz_visitor.visit_crate(&mut krate); - assert_matches_codepattern!( - print_crate_items(&krate), - "macro_rules! zz{(zz$zz:zz$(zz $zz:zz)zz+=>(zz$(zz$zz$zz)+))}".to_string() - ); - }) -} diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index 8285070839a..2a44c90abc1 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -95,12 +95,6 @@ pub(crate) fn string_to_stream(source_str: String) -> TokenStream { )) } -/// Parses a string, returns a crate. -pub(crate) fn string_to_crate(source_str: String) -> ast::Crate { - let psess = psess(); - with_error_checking_parse(source_str, &psess, |p| p.parse_crate_mod()) -} - /// Does the given string match the pattern? whitespace in the first string /// may be deleted or replaced with other whitespace to match the pattern. /// This function is relatively Unicode-ignorant; fortunately, the careful design diff --git a/tests/ui-fulldeps/auxiliary/parser.rs b/tests/ui-fulldeps/auxiliary/parser.rs index 4ea0d814b1f..be51bd29008 100644 --- a/tests/ui-fulldeps/auxiliary/parser.rs +++ b/tests/ui-fulldeps/auxiliary/parser.rs @@ -39,8 +39,6 @@ pub fn parse_expr(psess: &ParseSess, source_code: &str) -> Option> { struct Normalize; impl MutVisitor for Normalize { - const VISIT_TOKENS: bool = true; - fn visit_id(&mut self, id: &mut NodeId) { *id = DUMMY_NODE_ID; } -- cgit 1.4.1-3-g733a5