diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-10 19:18:44 +1100 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-12-18 12:50:22 +1100 |
| commit | 2903356b2e0d73089d66e24b61bfd47d22467851 (patch) | |
| tree | 08b5de54ccd98b8dacfe9e7ef9b046e0e4c62f13 /compiler/rustc_parse/src | |
| parent | fd83954d66411d0ecc0cdac9e78923c01749eb0f (diff) | |
| download | rust-2903356b2e0d73089d66e24b61bfd47d22467851.tar.gz rust-2903356b2e0d73089d66e24b61bfd47d22467851.zip | |
Overhaul `TokenTreeCursor`.
- Move it to `rustc_parse`, which is the only crate that uses it. This lets us remove all the `pub` markers from it. - Change `next_ref` and `look_ahead` to `get` and `bump`, which work better for the `rustc_parse` uses. - This requires adding a `TokenStream::get` method, which is simple. - In `TokenCursor`, we currently duplicate the `DelimSpan`/`DelimSpacing`/`Delimiter` from the surrounding `TokenTree::Delimited` in the stack. This isn't necessary so long as we don't prematurely move past the `Delimited`, and is a small perf win on a very hot code path. - In `parse_token_tree`, we clone the relevant `TokenTree::Delimited` instead of constructing an identical one from pieces.
Diffstat (limited to 'compiler/rustc_parse/src')
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 92 |
2 files changed, 63 insertions, 33 deletions
diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index a2136399b0c..fe5df04deeb 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -8,6 +8,7 @@ use ast::token::IdentIsRaw; use ast::{CoroutineKind, ForLoopKind, GenBlockKind, MatchKind, Pat, Path, PathSegment, Recovered}; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Token, TokenKind}; +use rustc_ast::tokenstream::TokenTree; use rustc_ast::util::case::Case; use rustc_ast::util::classify; use rustc_ast::util::parser::{AssocOp, ExprPrecedence, Fixity, prec_let_scrutinee_needs_par}; @@ -2393,7 +2394,8 @@ impl<'a> Parser<'a> { } if self.token == TokenKind::Semi - && matches!(self.token_cursor.stack.last(), Some((.., Delimiter::Parenthesis))) + && let Some(last) = self.token_cursor.stack.last() + && let Some(TokenTree::Delimited(_, _, Delimiter::Parenthesis, _)) = last.curr() && self.may_recover() { // It is likely that the closure body is a block but where the diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 976ffe608a2..b85968a555d 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -24,9 +24,7 @@ use rustc_ast::ptr::P; use rustc_ast::token::{ self, Delimiter, IdentIsRaw, InvisibleOrigin, MetaVarKind, Nonterminal, Token, TokenKind, }; -use rustc_ast::tokenstream::{ - AttrsTarget, DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree, TokenTreeCursor, -}; +use rustc_ast::tokenstream::{AttrsTarget, Spacing, TokenStream, TokenTree}; use rustc_ast::util::case::Case; use rustc_ast::{ self as ast, AnonConst, AttrArgs, AttrId, ByRef, Const, CoroutineKind, DUMMY_NODE_ID, @@ -273,21 +271,48 @@ struct CaptureState { seen_attrs: IntervalSet<AttrId>, } -/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that +#[derive(Clone, Debug)] +struct TokenTreeCursor { + stream: TokenStream, + /// Points to the current token tree in the stream. In `TokenCursor::curr`, + /// this can be any token tree. In `TokenCursor::stack`, this is always a + /// `TokenTree::Delimited`. + index: usize, +} + +impl TokenTreeCursor { + #[inline] + fn new(stream: TokenStream) -> Self { + TokenTreeCursor { stream, index: 0 } + } + + #[inline] + fn curr(&self) -> Option<&TokenTree> { + self.stream.get(self.index) + } + + #[inline] + fn bump(&mut self) { + self.index += 1; + } +} + +/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) /// use this type to emit them as a linear sequence. But a linear sequence is /// what the parser expects, for the most part. #[derive(Clone, Debug)] struct TokenCursor { - // Cursor for the current (innermost) token stream. The delimiters for this - // token stream are found in `self.stack.last()`; when that is `None` then - // we are in the outermost token stream which never has delimiters. - tree_cursor: TokenTreeCursor, - - // Token streams surrounding the current one. The delimiters for stack[n]'s - // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters - // because it's the outermost token stream which never has delimiters. - stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>, + // Cursor for the current (innermost) token stream. The index within the + // cursor can point to any token tree in the stream (or one past the end). + // The delimiters for this token stream are found in `self.stack.last()`; + // if that is `None` we are in the outermost token stream which never has + // delimiters. + curr: TokenTreeCursor, + + // Token streams surrounding the current one. The index within each cursor + // always points to a `TokenTree::Delimited`. + stack: Vec<TokenTreeCursor>, } impl TokenCursor { @@ -302,32 +327,33 @@ impl TokenCursor { // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions // below can be removed. - if let Some(tree) = self.tree_cursor.next_ref() { + if let Some(tree) = self.curr.curr() { match tree { &TokenTree::Token(ref token, spacing) => { debug_assert!(!matches!( token.kind, token::OpenDelim(_) | token::CloseDelim(_) )); - return (token.clone(), spacing); + let res = (token.clone(), spacing); + self.curr.bump(); + return res; } &TokenTree::Delimited(sp, spacing, delim, ref tts) => { - let trees = tts.clone().into_trees(); - self.stack.push(( - mem::replace(&mut self.tree_cursor, trees), - sp, - spacing, - delim, - )); + let trees = TokenTreeCursor::new(tts.clone()); + self.stack.push(mem::replace(&mut self.curr, trees)); if !delim.skip() { return (Token::new(token::OpenDelim(delim), sp.open), spacing.open); } // No open delimiter to return; continue on to the next iteration. } }; - } else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() { + } else if let Some(parent) = self.stack.pop() { // We have exhausted this token stream. Move back to its parent token stream. - self.tree_cursor = tree_cursor; + let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else { + panic!("parent should be Delimited") + }; + self.curr = parent; + self.curr.bump(); // move past the `Delimited` if !delim.skip() { return (Token::new(token::CloseDelim(delim), span.close), spacing.close); } @@ -466,7 +492,7 @@ impl<'a> Parser<'a> { capture_cfg: false, restrictions: Restrictions::empty(), expected_tokens: Vec::new(), - token_cursor: TokenCursor { tree_cursor: stream.into_trees(), stack: Vec::new() }, + token_cursor: TokenCursor { curr: TokenTreeCursor::new(stream), stack: Vec::new() }, num_bump_calls: 0, break_last_token: 0, unmatched_angle_bracket_count: 0, @@ -1192,7 +1218,7 @@ impl<'a> Parser<'a> { if dist == 1 { // The index is zero because the tree cursor's index always points // to the next token to be gotten. - match self.token_cursor.tree_cursor.look_ahead(0) { + match self.token_cursor.curr.curr() { Some(tree) => { // Indexing stayed within the current token tree. match tree { @@ -1202,12 +1228,13 @@ impl<'a> Parser<'a> { return looker(&Token::new(token::OpenDelim(delim), dspan.open)); } } - }; + } } None => { // The tree cursor lookahead went (one) past the end of the // current token tree. Try to return a close delimiter. - if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last() + if let Some(last) = self.token_cursor.stack.last() + && let Some(&TokenTree::Delimited(span, _, delim, _)) = last.curr() && !delim.skip() { // We are not in the outermost token stream, so we have @@ -1399,9 +1426,10 @@ impl<'a> Parser<'a> { pub fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - // Grab the tokens within the delimiters. - let stream = self.token_cursor.tree_cursor.stream.clone(); - let (_, span, spacing, delim) = *self.token_cursor.stack.last().unwrap(); + // Clone the `TokenTree::Delimited` that we are currently + // within. That's what we are going to return. + let tree = self.token_cursor.stack.last().unwrap().curr().unwrap().clone(); + debug_assert_matches!(tree, TokenTree::Delimited(..)); // Advance the token cursor through the entire delimited // sequence. After getting the `OpenDelim` we are *within* the @@ -1421,7 +1449,7 @@ impl<'a> Parser<'a> { // Consume close delimiter self.bump(); - TokenTree::Delimited(span, spacing, delim, stream) + tree } token::CloseDelim(_) | token::Eof => unreachable!(), _ => { |
