diff options
| author | Aaron Hill <aa1ronham@gmail.com> | 2020-12-12 15:20:22 -0500 |
|---|---|---|
| committer | Aaron Hill <aa1ronham@gmail.com> | 2020-12-12 16:28:13 -0500 |
| commit | e6fa6334dd54f7c96514b520c5b9f261df3fc16b (patch) | |
| tree | 8561fd33a725cedad047218ea2741deaff324a04 /compiler/rustc_parse/src/parser/mod.rs | |
| parent | 388eb24b6c479088a83c1b094d79221a32dfc7ff (diff) | |
| download | rust-e6fa6334dd54f7c96514b520c5b9f261df3fc16b.tar.gz rust-e6fa6334dd54f7c96514b520c5b9f261df3fc16b.zip | |
Properly capture trailing 'unglued' token
If we try to capture the `Vec<u8>` in `Option<Vec<u8>>`, we'll need to capture a `>` token which was 'unglued' from a `>>` token. The processing of unglueing a token for parsing purposes bypasses the usual capturing infrastructure, so we currently lose the trailing `>`. As a result, we fall back to the reparsed `TokenStream`, causing us to lose spans. This commit makes token capturing keep track of a trailing 'unglued' token. Note that we don't need to care about unglueing except at the end of the captured tokens - if we capture both the first and second unglued tokens, then we'll end up capturing the full 'glued' token, which already works correctly.
Diffstat (limited to 'compiler/rustc_parse/src/parser/mod.rs')
| -rw-r--r-- | compiler/rustc_parse/src/parser/mod.rs | 67 |
1 files changed, 58 insertions, 9 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index df4695b18e7..d51a0fcbf09 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -17,7 +17,7 @@ pub use path::PathStyle; use rustc_ast::ptr::P; use rustc_ast::token::{self, DelimToken, Token, TokenKind}; use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, Spacing}; -use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree}; +use rustc_ast::tokenstream::{CreateTokenStream, TokenStream, TokenTree, TreeAndSpacing}; use rustc_ast::DUMMY_NODE_ID; use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe}; use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit}; @@ -132,6 +132,28 @@ struct TokenCursor { // Counts the number of calls to `next` or `next_desugared`, // depending on whether `desugar_doc_comments` is set. num_next_calls: usize, + // During parsing, we may sometimes need to 'unglue' a + // glued token into two component tokens + // (e.g. '>>' into '>' and '>), so that the parser + // can consume them one at a time. This process + // bypasses the normal capturing mechanism + // (e.g. `num_next_calls` will not be incremented), + // since the 'unglued' tokens due not exist in + // the original `TokenStream`. + // + // If we end up consuming both unglued tokens, + // then this is not an issue - we'll end up + // capturing the single 'glued' token. + // + // However, in certain circumstances, we may + // want to capture just the first 'unglued' token. + // For example, capturing the `Vec<u8>` + // in `Option<Vec<u8>>` requires us to unglue + // the trailing `>>` token. The `append_unglued_token` + // field is used to track this token - it gets + // appended to the captured stream when + // we evaluate a `LazyTokenStream` + append_unglued_token: Option<TreeAndSpacing>, } #[derive(Clone)] @@ -336,6 +358,7 @@ impl<'a> Parser<'a> { stack: Vec::new(), num_next_calls: 0, desugar_doc_comments, + append_unglued_token: None, }, desugar_doc_comments, unmatched_angle_bracket_count: 0, @@ -359,6 +382,10 @@ impl<'a> Parser<'a> { self.token_cursor.next() }; self.token_cursor.num_next_calls += 1; + // We've retrieved an token from the underlying + // cursor, so we no longer need to worry about + // an unglued token. See `break_and_eat` for more details + self.token_cursor.append_unglued_token = None; if next.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. next.span = fallback_span.with_ctxt(next.span.ctxt()); @@ -555,6 +582,14 @@ impl<'a> Parser<'a> { let first_span = self.sess.source_map().start_point(self.token.span); let second_span = self.token.span.with_lo(first_span.hi()); self.token = Token::new(first, first_span); + // Keep track of this token - if we end token capturing now, + // we'll want to append this token to the captured stream. + // + // If we consume any additional tokens, then this token + // is not needed (we'll capture the entire 'glued' token), + // and `next_tok` will set this field to `None` + self.token_cursor.append_unglued_token = + Some((TokenTree::Token(self.token.clone()), Spacing::Alone)); // Use the spacing of the glued token as the spacing // of the unglued second token. self.bump_with((Token::new(second, second_span), self.token_spacing)); @@ -1230,6 +1265,7 @@ impl<'a> Parser<'a> { num_calls: usize, desugar_doc_comments: bool, trailing_semi: bool, + append_unglued_token: Option<TreeAndSpacing>, } impl CreateTokenStream for LazyTokenStreamImpl { fn create_token_stream(&self) -> TokenStream { @@ -1253,12 +1289,18 @@ impl<'a> Parser<'a> { })) .take(num_calls); - make_token_stream(tokens) + make_token_stream(tokens, self.append_unglued_token.clone()) } fn add_trailing_semi(&self) -> Box<dyn CreateTokenStream> { if self.trailing_semi { panic!("Called `add_trailing_semi` twice!"); } + if self.append_unglued_token.is_some() { + panic!( + "Cannot call `add_trailing_semi` when we have an unglued token {:?}", + self.append_unglued_token + ); + } let mut new = self.clone(); new.trailing_semi = true; Box::new(new) @@ -1271,6 +1313,7 @@ impl<'a> Parser<'a> { cursor_snapshot, desugar_doc_comments: self.desugar_doc_comments, trailing_semi: false, + append_unglued_token: self.token_cursor.append_unglued_token.clone(), }; Ok((ret, Some(LazyTokenStream::new(lazy_impl)))) } @@ -1325,7 +1368,10 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec<UnmatchedBrace>, sess: &Pa /// Converts a flattened iterator of tokens (including open and close delimiter tokens) /// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair /// of open and close delims. -fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStream { +fn make_token_stream( + tokens: impl Iterator<Item = (Token, Spacing)>, + append_unglued_token: Option<TreeAndSpacing>, +) -> TokenStream { #[derive(Debug)] struct FrameData { open: Span, @@ -1348,14 +1394,17 @@ fn make_token_stream(tokens: impl Iterator<Item = (Token, Spacing)>) -> TokenStr .inner .push((delimited, Spacing::Alone)); } - token => stack - .last_mut() - .expect("Bottom token frame is missing!") - .inner - .push((TokenTree::Token(token), spacing)), + token => { + stack + .last_mut() + .expect("Bottom token frame is missing!") + .inner + .push((TokenTree::Token(token), spacing)); + } } } - let final_buf = stack.pop().expect("Missing final buf!"); + let mut final_buf = stack.pop().expect("Missing final buf!"); + final_buf.inner.extend(append_unglued_token); assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack); TokenStream::new(final_buf.inner) } |
