diff options
| author | Dylan DPC <99973273+Dylan-DPC@users.noreply.github.com> | 2023-02-03 23:04:51 +0530 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-02-03 23:04:51 +0530 |
| commit | 815dc9c48097b4b31dd7d7a90e2fa0188895dc62 (patch) | |
| tree | 09f441dd2590277d7e87ef4d414b91310d03453c /compiler/rustc_parse/src/parser | |
| parent | d9db35785d33e2b6c6e9b4971dfdbe0984a69b9e (diff) | |
| parent | a86fc727fa9b9fa1ac60b67147736783b3376e91 (diff) | |
| download | rust-815dc9c48097b4b31dd7d7a90e2fa0188895dc62.tar.gz rust-815dc9c48097b4b31dd7d7a90e2fa0188895dc62.zip | |
Rollup merge of #107544 - nnethercote:improve-TokenCursor, r=petrochenkov
Improve `TokenCursor`. Some small improvements, for things that were bugging me. Best reviewed one commit at a time. r? ``@petrochenkov``
Diffstat (limited to 'compiler/rustc_parse/src/parser')
| -rw-r--r-- | compiler/rustc_parse/src/parser/attr_wrapper.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/expr.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 123 |
3 files changed, 61 insertions, 66 deletions
diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs index b97f22417cb..dbd3b76786f 100644 --- a/compiler/rustc_parse/src/parser/attr_wrapper.rs +++ b/compiler/rustc_parse/src/parser/attr_wrapper.rs @@ -469,6 +469,6 @@ mod size_asserts { use rustc_data_structures::static_assert_size; // tidy-alphabetical-start static_assert_size!(AttrWrapper, 16); - static_assert_size!(LazyAttrTokenStreamImpl, 144); + static_assert_size!(LazyAttrTokenStreamImpl, 120); // tidy-alphabetical-end } diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index 3d42a9dcbbe..8a6436b041b 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -2141,7 +2141,7 @@ impl<'a> Parser<'a> { } if self.token.kind == TokenKind::Semi - && matches!(self.token_cursor.frame.delim_sp, Some((Delimiter::Parenthesis, _))) + && matches!(self.token_cursor.stack.last(), Some((_, Delimiter::Parenthesis, _))) && self.may_recover() { // It is likely that the closure body is a block but where the diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index ffb23b50a16..2ea55f838a3 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -19,9 +19,8 @@ pub use path::PathStyle; use rustc_ast::ptr::P; use rustc_ast::token::{self, Delimiter, Nonterminal, Token, TokenKind}; -use rustc_ast::tokenstream::AttributesData; -use rustc_ast::tokenstream::{self, DelimSpan, Spacing}; -use rustc_ast::tokenstream::{TokenStream, TokenTree}; +use rustc_ast::tokenstream::{AttributesData, DelimSpan, Spacing}; +use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor}; use rustc_ast::util::case::Case; use rustc_ast::AttrId; use rustc_ast::DUMMY_NODE_ID; @@ -168,7 +167,7 @@ pub struct Parser<'a> { // This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure // it doesn't unintentionally get bigger. #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -rustc_data_structures::static_assert_size!(Parser<'_>, 336); +rustc_data_structures::static_assert_size!(Parser<'_>, 312); /// Stores span information about a closure. #[derive(Clone)] @@ -221,18 +220,27 @@ impl<'a> Drop for Parser<'a> { } } +/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that +/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b) +/// use this type to emit them as a linear sequence. But a linear sequence is +/// what the parser expects, for the most part. #[derive(Clone)] struct TokenCursor { - // The current (innermost) frame. `frame` and `stack` could be combined, - // but it's faster to have them separately to access `frame` directly - // rather than via something like `stack.last().unwrap()` or - // `stack[stack.len() - 1]`. - frame: TokenCursorFrame, - // Additional frames that enclose `frame`. - stack: Vec<TokenCursorFrame>, + // Cursor for the current (innermost) token stream. The delimiters for this + // token stream are found in `self.stack.last()`; when that is `None` then + // we are in the outermost token stream which never has delimiters. + tree_cursor: TokenTreeCursor, + + // Token streams surrounding the current one. The delimiters for stack[n]'s + // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters + // because it's the outermost token stream which never has delimiters. + stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>, + desugar_doc_comments: bool, + // Counts the number of calls to `{,inlined_}next`. num_next_calls: usize, + // During parsing, we may sometimes need to 'unglue' a // glued token into two component tokens // (e.g. '>>' into '>' and '>), so that the parser @@ -257,18 +265,6 @@ struct TokenCursor { break_last_token: bool, } -#[derive(Clone)] -struct TokenCursorFrame { - delim_sp: Option<(Delimiter, DelimSpan)>, - tree_cursor: tokenstream::Cursor, -} - -impl TokenCursorFrame { - fn new(delim_sp: Option<(Delimiter, DelimSpan)>, tts: TokenStream) -> Self { - TokenCursorFrame { delim_sp, tree_cursor: tts.into_trees() } - } -} - impl TokenCursor { fn next(&mut self, desugar_doc_comments: bool) -> (Token, Spacing) { self.inlined_next(desugar_doc_comments) @@ -281,38 +277,47 @@ impl TokenCursor { // FIXME: we currently don't return `Delimiter` open/close delims. To fix #67062 we will // need to, whereupon the `delim != Delimiter::Invisible` conditions below can be // removed. - if let Some(tree) = self.frame.tree_cursor.next_ref() { + if let Some(tree) = self.tree_cursor.next_ref() { match tree { &TokenTree::Token(ref token, spacing) => match (desugar_doc_comments, token) { (true, &Token { kind: token::DocComment(_, attr_style, data), span }) => { - return self.desugar(attr_style, data, span); + let desugared = self.desugar(attr_style, data, span); + self.tree_cursor.replace_prev_and_rewind(desugared); + // Continue to get the first token of the desugared doc comment. + } + _ => { + debug_assert!(!matches!( + token.kind, + token::OpenDelim(_) | token::CloseDelim(_) + )); + return (token.clone(), spacing); } - _ => return (token.clone(), spacing), }, &TokenTree::Delimited(sp, delim, ref tts) => { - // Set `open_delim` to true here because we deal with it immediately. - let frame = TokenCursorFrame::new(Some((delim, sp)), tts.clone()); - self.stack.push(mem::replace(&mut self.frame, frame)); + let trees = tts.clone().into_trees(); + self.stack.push((mem::replace(&mut self.tree_cursor, trees), delim, sp)); if delim != Delimiter::Invisible { return (Token::new(token::OpenDelim(delim), sp.open), Spacing::Alone); } // No open delimiter to return; continue on to the next iteration. } }; - } else if let Some(frame) = self.stack.pop() { - if let Some((delim, span)) = self.frame.delim_sp && delim != Delimiter::Invisible { - self.frame = frame; + } else if let Some((tree_cursor, delim, span)) = self.stack.pop() { + // We have exhausted this token stream. Move back to its parent token stream. + self.tree_cursor = tree_cursor; + if delim != Delimiter::Invisible { return (Token::new(token::CloseDelim(delim), span.close), Spacing::Alone); } - self.frame = frame; // No close delimiter to return; continue on to the next iteration. } else { + // We have exhausted the outermost token stream. return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone); } } } - fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> (Token, Spacing) { + // Desugar a doc comment into something like `#[doc = r"foo"]`. + fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> { // Searches for the occurrences of `"#*` and returns the minimum number of `#`s // required to wrap the text. E.g. // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0) @@ -346,27 +351,15 @@ impl TokenCursor { .collect::<TokenStream>(), ); - self.stack.push(mem::replace( - &mut self.frame, - TokenCursorFrame::new( - None, - if attr_style == AttrStyle::Inner { - [ - TokenTree::token_alone(token::Pound, span), - TokenTree::token_alone(token::Not, span), - body, - ] - .into_iter() - .collect::<TokenStream>() - } else { - [TokenTree::token_alone(token::Pound, span), body] - .into_iter() - .collect::<TokenStream>() - }, - ), - )); - - self.next(/* desugar_doc_comments */ false) + if attr_style == AttrStyle::Inner { + vec![ + TokenTree::token_alone(token::Pound, span), + TokenTree::token_alone(token::Not, span), + body, + ] + } else { + vec![TokenTree::token_alone(token::Pound, span), body] + } } } @@ -475,7 +468,7 @@ impl<'a> Parser<'a> { restrictions: Restrictions::empty(), expected_tokens: Vec::new(), token_cursor: TokenCursor { - frame: TokenCursorFrame::new(None, tokens), + tree_cursor: tokens.into_trees(), stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, @@ -1142,14 +1135,16 @@ impl<'a> Parser<'a> { return looker(&self.token); } - let frame = &self.token_cursor.frame; - if let Some((delim, span)) = frame.delim_sp && delim != Delimiter::Invisible { + let tree_cursor = &self.token_cursor.tree_cursor; + if let Some(&(_, delim, span)) = self.token_cursor.stack.last() + && delim != Delimiter::Invisible + { let all_normal = (0..dist).all(|i| { - let token = frame.tree_cursor.look_ahead(i); + let token = tree_cursor.look_ahead(i); !matches!(token, Some(TokenTree::Delimited(_, Delimiter::Invisible, _))) }); if all_normal { - return match frame.tree_cursor.look_ahead(dist - 1) { + return match tree_cursor.look_ahead(dist - 1) { Some(tree) => match tree { TokenTree::Token(token, _) => looker(token), TokenTree::Delimited(dspan, delim, _) => { @@ -1310,10 +1305,10 @@ impl<'a> Parser<'a> { pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - // Grab the tokens from this frame. - let frame = &self.token_cursor.frame; - let stream = frame.tree_cursor.stream.clone(); - let (delim, span) = frame.delim_sp.unwrap(); + // Grab the tokens within the delimiters. + let tree_cursor = &self.token_cursor.tree_cursor; + let stream = tree_cursor.stream.clone(); + let (_, delim, span) = *self.token_cursor.stack.last().unwrap(); // Advance the token cursor through the entire delimited // sequence. After getting the `OpenDelim` we are *within* the |
