diff options
Diffstat (limited to 'compiler/rustc_parse/src')
| -rw-r--r-- | compiler/rustc_parse/src/lib.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/attr.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/attr_wrapper.rs | 169 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 176 |
4 files changed, 12 insertions, 338 deletions
diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index e73d68e2037..37204702bcb 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -5,7 +5,6 @@ #![allow(rustc::diagnostic_outside_of_impl)] #![allow(rustc::untranslatable_diagnostic)] #![cfg_attr(bootstrap, feature(let_chains))] -#![feature(array_windows)] #![feature(assert_matches)] #![feature(box_patterns)] #![feature(debug_closure_helpers)] diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 53614049f08..41d3889c448 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -1,5 +1,6 @@ use rustc_ast as ast; use rustc_ast::token::{self, MetaVarKind}; +use rustc_ast::tokenstream::ParserRange; use rustc_ast::{Attribute, attr}; use rustc_errors::codes::*; use rustc_errors::{Diag, PResult}; @@ -8,8 +9,7 @@ use thin_vec::ThinVec; use tracing::debug; use super::{ - AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing, - UsePreAttrPos, + AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, PathStyle, Trailing, UsePreAttrPos, }; use crate::{errors, exp, fluent_generated as fluent}; diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index 6061c9cb485..912045d8835 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -1,21 +1,18 @@ use std::borrow::Cow; -use std::{iter, mem}; +use std::mem; -use rustc_ast::token::{Delimiter, Token}; +use rustc_ast::token::Token; use rustc_ast::tokenstream::{ - AttrTokenStream, AttrTokenTree, AttrsTarget, DelimSpacing, DelimSpan, LazyAttrTokenStream, - Spacing, ToAttrTokenStream, + AttrsTarget, LazyAttrTokenStream, LazyAttrTokenStreamImpl, NodeRange, ParserRange, Spacing, + TokenCursor, }; use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens}; use rustc_data_structures::fx::FxHashSet; use rustc_errors::PResult; use rustc_session::parse::ParseSess; -use rustc_span::{DUMMY_SP, Span, sym}; +use rustc_span::{DUMMY_SP, sym}; -use super::{ - Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange, - TokenCursor, Trailing, -}; +use super::{Capturing, ForceCollect, Parser, Trailing}; // When collecting tokens, this fully captures the start point. Usually its // just after outer attributes, but occasionally it's before. @@ -94,95 +91,6 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool { }) } -// From a value of this type we can reconstruct the `TokenStream` seen by the -// `f` callback passed to a call to `Parser::collect_tokens`, by -// replaying the getting of the tokens. This saves us producing a `TokenStream` -// if it is never needed, e.g. a captured `macro_rules!` argument that is never -// passed to a proc macro. In practice, token stream creation happens rarely -// compared to calls to `collect_tokens` (see some statistics in #78736) so we -// are doing as little up-front work as possible. -// -// This also makes `Parser` very cheap to clone, since -// there is no intermediate collection buffer to clone. -struct LazyAttrTokenStreamImpl { - start_token: (Token, Spacing), - cursor_snapshot: TokenCursor, - num_calls: u32, - break_last_token: u32, - node_replacements: Box<[NodeReplacement]>, -} - -impl ToAttrTokenStream for LazyAttrTokenStreamImpl { - fn to_attr_token_stream(&self) -> AttrTokenStream { - // The token produced by the final call to `{,inlined_}next` was not - // actually consumed by the callback. The combination of chaining the - // initial token and using `take` produces the desired result - we - // produce an empty `TokenStream` if no calls were made, and omit the - // final token otherwise. - let mut cursor_snapshot = self.cursor_snapshot.clone(); - let tokens = iter::once(FlatToken::Token(self.start_token)) - .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next()))) - .take(self.num_calls as usize); - - if self.node_replacements.is_empty() { - make_attr_token_stream(tokens, self.break_last_token) - } else { - let mut tokens: Vec<_> = tokens.collect(); - let mut node_replacements = self.node_replacements.to_vec(); - node_replacements.sort_by_key(|(range, _)| range.0.start); - - #[cfg(debug_assertions)] - for [(node_range, tokens), (next_node_range, next_tokens)] in - node_replacements.array_windows() - { - assert!( - node_range.0.end <= next_node_range.0.start - || node_range.0.end >= next_node_range.0.end, - "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})", - node_range, - tokens, - next_node_range, - next_tokens, - ); - } - - // Process the replace ranges, starting from the highest start - // position and working our way back. If have tokens like: - // - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // Then we will generate replace ranges for both - // the `#[cfg(FALSE)] field: bool` and the entire - // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }` - // - // By starting processing from the replace range with the greatest - // start position, we ensure that any (outer) replace range which - // encloses another (inner) replace range will fully overwrite the - // inner range's replacement. - for (node_range, target) in node_replacements.into_iter().rev() { - assert!( - !node_range.0.is_empty(), - "Cannot replace an empty node range: {:?}", - node_range.0 - ); - - // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus - // enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the - // total length of `tokens` constant throughout the replacement process, allowing - // us to do all replacements without adjusting indices. - let target_len = target.is_some() as usize; - tokens.splice( - (node_range.0.start as usize)..(node_range.0.end as usize), - target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain( - iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len), - ), - ); - } - make_attr_token_stream(tokens.into_iter(), self.break_last_token) - } - } -} - impl<'a> Parser<'a> { pub(super) fn collect_pos(&self) -> CollectPos { CollectPos { @@ -483,71 +391,6 @@ impl<'a> Parser<'a> { } } -/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an -/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and -/// close delims. -fn make_attr_token_stream( - iter: impl Iterator<Item = FlatToken>, - break_last_token: u32, -) -> AttrTokenStream { - #[derive(Debug)] - struct FrameData { - // This is `None` for the first frame, `Some` for all others. - open_delim_sp: Option<(Delimiter, Span, Spacing)>, - inner: Vec<AttrTokenTree>, - } - // The stack always has at least one element. Storing it separately makes for shorter code. - let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] }; - let mut stack_rest = vec![]; - for flat_token in iter { - match flat_token { - FlatToken::Token((token @ Token { kind, span }, spacing)) => { - if let Some(delim) = kind.open_delim() { - stack_rest.push(mem::replace( - &mut stack_top, - FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] }, - )); - } else if let Some(delim) = kind.close_delim() { - let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap()); - let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap(); - assert!( - open_delim.eq_ignoring_invisible_origin(&delim), - "Mismatched open/close delims: open={open_delim:?} close={span:?}" - ); - let dspan = DelimSpan::from_pair(open_sp, span); - let dspacing = DelimSpacing::new(open_spacing, spacing); - let stream = AttrTokenStream::new(frame_data.inner); - let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream); - stack_top.inner.push(delimited); - } else { - stack_top.inner.push(AttrTokenTree::Token(token, spacing)) - } - } - FlatToken::AttrsTarget(target) => { - stack_top.inner.push(AttrTokenTree::AttrsTarget(target)) - } - FlatToken::Empty => {} - } - } - - if break_last_token > 0 { - let last_token = stack_top.inner.pop().unwrap(); - if let AttrTokenTree::Token(last_token, spacing) = last_token { - let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap(); - - // Tokens are always ASCII chars, so we can use byte arithmetic here. - let mut first_span = last_token.span.shrink_to_lo(); - first_span = - first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token)); - - stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing)); - } else { - panic!("Unexpected last token {last_token:?}") - } - } - AttrTokenStream::new(stack_top.inner) -} - /// Tokens are needed if: /// - any non-single-segment attributes (other than doc comments) are present, /// e.g. `rustfmt::skip`; or diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 48df8b59d55..5500fba58a5 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -12,7 +12,6 @@ pub mod token_type; mod ty; use std::assert_matches::debug_assert_matches; -use std::ops::Range; use std::{fmt, mem, slice}; use attr_wrapper::{AttrWrapper, UsePreAttrPos}; @@ -25,7 +24,9 @@ use rustc_ast::ptr::P; use rustc_ast::token::{ self, IdentIsRaw, InvisibleOrigin, MetaVarKind, NtExprKind, NtPatKind, Token, TokenKind, }; -use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree}; +use rustc_ast::tokenstream::{ + ParserRange, ParserReplacement, Spacing, TokenCursor, TokenStream, TokenTree, TokenTreeCursor, +}; use rustc_ast::util::case::Case; use rustc_ast::{ self as ast, AnonConst, AttrArgs, AttrId, ByRef, Const, CoroutineKind, DUMMY_NODE_ID, @@ -37,7 +38,7 @@ use rustc_data_structures::fx::FxHashMap; use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult}; use rustc_index::interval::IntervalSet; use rustc_session::parse::ParseSess; -use rustc_span::{DUMMY_SP, Ident, Span, Symbol, kw, sym}; +use rustc_span::{Ident, Span, Symbol, kw, sym}; use thin_vec::ThinVec; use token_type::TokenTypeSet; pub use token_type::{ExpKeywordPair, ExpTokenPair, TokenType}; @@ -187,57 +188,6 @@ struct ClosureSpans { body: Span, } -/// A token range within a `Parser`'s full token stream. -#[derive(Clone, Debug)] -struct ParserRange(Range<u32>); - -/// A token range within an individual AST node's (lazy) token stream, i.e. -/// relative to that node's first token. Distinct from `ParserRange` so the two -/// kinds of range can't be mixed up. -#[derive(Clone, Debug)] -struct NodeRange(Range<u32>); - -/// Indicates a range of tokens that should be replaced by an `AttrsTarget` -/// (replacement) or be replaced by nothing (deletion). This is used in two -/// places during token collection. -/// -/// 1. Replacement. During the parsing of an AST node that may have a -/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]` -/// or `#[cfg_attr]`, we replace the entire inner AST node with -/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an -/// `AttrTokenStream`. -/// -/// 2. Deletion. We delete inner attributes from all collected token streams, -/// and instead track them through the `attrs` field on the AST node. This -/// lets us manipulate them similarly to outer attributes. When we create a -/// `TokenStream`, the inner attributes are inserted into the proper place -/// in the token stream. -/// -/// Each replacement starts off in `ParserReplacement` form but is converted to -/// `NodeReplacement` form when it is attached to a single AST node, via -/// `LazyAttrTokenStreamImpl`. -type ParserReplacement = (ParserRange, Option<AttrsTarget>); - -/// See the comment on `ParserReplacement`. -type NodeReplacement = (NodeRange, Option<AttrsTarget>); - -impl NodeRange { - // Converts a range within a parser's tokens to a range within a - // node's tokens beginning at `start_pos`. - // - // For example, imagine a parser with 50 tokens in its token stream, a - // function that spans `ParserRange(20..40)` and an inner attribute within - // that function that spans `ParserRange(30..35)`. We would find the inner - // attribute's range within the function's tokens by subtracting 20, which - // is the position of the function's start token. This gives - // `NodeRange(10..15)`. - fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange { - assert!(!parser_range.is_empty()); - assert!(parser_range.start >= start_pos); - NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos)) - } -} - /// Controls how we capture tokens. Capturing can be expensive, /// so we try to avoid performing capturing in cases where /// we will never need an `AttrTokenStream`. @@ -260,104 +210,6 @@ struct CaptureState { seen_attrs: IntervalSet<AttrId>, } -#[derive(Clone, Debug)] -struct TokenTreeCursor { - stream: TokenStream, - /// Points to the current token tree in the stream. In `TokenCursor::curr`, - /// this can be any token tree. In `TokenCursor::stack`, this is always a - /// `TokenTree::Delimited`. - index: usize, -} - -impl TokenTreeCursor { - #[inline] - fn new(stream: TokenStream) -> Self { - TokenTreeCursor { stream, index: 0 } - } - - #[inline] - fn curr(&self) -> Option<&TokenTree> { - self.stream.get(self.index) - } - - fn look_ahead(&self, n: usize) -> Option<&TokenTree> { - self.stream.get(self.index + n) - } - - #[inline] - fn bump(&mut self) { - self.index += 1; - } -} - -/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that -/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) -/// use this type to emit them as a linear sequence. But a linear sequence is -/// what the parser expects, for the most part. -#[derive(Clone, Debug)] -struct TokenCursor { - // Cursor for the current (innermost) token stream. The index within the - // cursor can point to any token tree in the stream (or one past the end). - // The delimiters for this token stream are found in `self.stack.last()`; - // if that is `None` we are in the outermost token stream which never has - // delimiters. - curr: TokenTreeCursor, - - // Token streams surrounding the current one. The index within each cursor - // always points to a `TokenTree::Delimited`. - stack: Vec<TokenTreeCursor>, -} - -impl TokenCursor { - fn next(&mut self) -> (Token, Spacing) { - self.inlined_next() - } - - /// This always-inlined version should only be used on hot code paths. - #[inline(always)] - fn inlined_next(&mut self) -> (Token, Spacing) { - loop { - // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix - // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions - // below can be removed. - if let Some(tree) = self.curr.curr() { - match tree { - &TokenTree::Token(token, spacing) => { - debug_assert!(!token.kind.is_delim()); - let res = (token, spacing); - self.curr.bump(); - return res; - } - &TokenTree::Delimited(sp, spacing, delim, ref tts) => { - let trees = TokenTreeCursor::new(tts.clone()); - self.stack.push(mem::replace(&mut self.curr, trees)); - if !delim.skip() { - return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open); - } - // No open delimiter to return; continue on to the next iteration. - } - }; - } else if let Some(parent) = self.stack.pop() { - // We have exhausted this token stream. Move back to its parent token stream. - let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else { - panic!("parent should be Delimited") - }; - self.curr = parent; - self.curr.bump(); // move past the `Delimited` - if !delim.skip() { - return (Token::new(delim.as_close_token_kind(), span.close), spacing.close); - } - // No close delimiter to return; continue on to the next iteration. - } else { - // We have exhausted the outermost token stream. The use of - // `Spacing::Alone` is arbitrary and immaterial, because the - // `Eof` token's spacing is never used. - return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); - } - } - } -} - /// A sequence separator. #[derive(Debug)] struct SeqSep<'a> { @@ -1742,26 +1594,6 @@ impl<'a> Parser<'a> { } } -/// A helper struct used when building an `AttrTokenStream` from -/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens -/// are stored as `FlatToken::Token`. A vector of `FlatToken`s -/// is then 'parsed' to build up an `AttrTokenStream` with nested -/// `AttrTokenTree::Delimited` tokens. -#[derive(Debug, Clone)] -enum FlatToken { - /// A token - this holds both delimiter (e.g. '{' and '}') - /// and non-delimiter tokens - Token((Token, Spacing)), - /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted - /// directly into the constructed `AttrTokenStream` as an - /// `AttrTokenTree::AttrsTarget`. - AttrsTarget(AttrsTarget), - /// A special 'empty' token that is ignored during the conversion - /// to an `AttrTokenStream`. This is used to simplify the - /// handling of replace ranges. - Empty, -} - // Metavar captures of various kinds. #[derive(Clone, Debug)] pub enum ParseNtResult { |
