Move doc comment desugaring out of `TokenCursor`.

`TokenCursor` currently does doc comment desugaring on the fly, if the `desugar_doc_comment` field is set. This requires also modifying the token stream on the fly with `replace_prev_and_rewind`. This commit moves the doc comment desugaring out of `TokenCursor`, by introducing a new `TokenStream::desugar_doc_comment` method. This separation of desugaring and iterating makes the code nicer.
author: Nicholas Nethercote <n.nethercote@gmail.com> 2023-07-30 17:16:20 +1000
committer: Nicholas Nethercote <n.nethercote@gmail.com> 2023-07-31 14:47:03 +1000
commit: ff7d5ba65e94980fbfb13793ef5d503ffad559b0 (patch)
tree: 5cfec5ec4eb139687a13d8239312b90f3a533d08 /compiler/rustc_parse/src
parent: c70c8b7196539b2b0356abd7f683921a05f137e7 (diff)
download: rust-ff7d5ba65e94980fbfb13793ef5d503ffad559b0.tar.gz
rust-ff7d5ba65e94980fbfb13793ef5d503ffad559b0.zip
1 files changed, 13 insertions, 73 deletions
diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs
index 1d3489aba1b..7d39bf8edf3 100644
--- a/compiler/rustc_parse/src/parser/mod.rs
+++ b/compiler/rustc_parse/src/parser/mod.rs
@@ -24,7 +24,7 @@ use rustc_ast::tokenstream::{TokenStream, TokenTree, TokenTreeCursor};
 use rustc_ast::util::case::Case;
 use rustc_ast::AttrId;
 use rustc_ast::DUMMY_NODE_ID;
-use rustc_ast::{self as ast, AnonConst, AttrStyle, Const, DelimArgs, Extern};
+use rustc_ast::{self as ast, AnonConst, Const, DelimArgs, Extern};
 use rustc_ast::{Async, AttrArgs, AttrArgsEq, Expr, ExprKind, MacDelimiter, Mutability, StrLit};
 use rustc_ast::{HasAttrs, HasTokens, Unsafe, Visibility, VisibilityKind};
 use rustc_ast_pretty::pprust;
@@ -38,7 +38,7 @@ use rustc_session::parse::ParseSess;
 use rustc_span::source_map::{Span, DUMMY_SP};
 use rustc_span::symbol::{kw, sym, Ident, Symbol};
 use std::ops::Range;
-use std::{cmp, mem, slice};
+use std::{mem, slice};
 use thin_vec::ThinVec;
 use tracing::debug;
 
@@ -224,11 +224,6 @@ struct TokenCursor {
     // because it's the outermost token stream which never has delimiters.
     stack: Vec<(TokenTreeCursor, Delimiter, DelimSpan)>,
 
-    // We need to desugar doc comments from `/// foo` form into `#[doc =
-    // r"foo"]` form when parsing declarative macro inputs in `parse_tt`,
-    // because some declarative macros look for `doc` attributes.
-    desugar_doc_comments: bool,
-
     // Counts the number of calls to `{,inlined_}next`.
     num_next_calls: usize,
 
@@ -271,23 +266,11 @@ impl TokenCursor {
             if let Some(tree) = self.tree_cursor.next_ref() {
                 match tree {
                     &TokenTree::Token(ref token, spacing) => {
-                        match (self.desugar_doc_comments, token) {
-                            (
-                                true,
-                                &Token { kind: token::DocComment(_, attr_style, data), span },
-                            ) => {
-                                let desugared = self.desugar(attr_style, data, span);
-                                self.tree_cursor.replace_prev_and_rewind(desugared);
-                                // Continue to get the first token of the desugared doc comment.
-                            }
-                            _ => {
-                                debug_assert!(!matches!(
-                                    token.kind,
-                                    token::OpenDelim(_) | token::CloseDelim(_)
-                                ));
-                                return (token.clone(), spacing);
-                            }
-                        }
+                        debug_assert!(!matches!(
+                            token.kind,
+                            token::OpenDelim(_) | token::CloseDelim(_)
+                        ));
+                        return (token.clone(), spacing);
                     }
                     &TokenTree::Delimited(sp, delim, ref tts) => {
                         let trees = tts.clone().into_trees();
@@ -311,52 +294,6 @@ impl TokenCursor {
             }
         }
     }
-
-    // Desugar a doc comment into something like `#[doc = r"foo"]`.
-    fn desugar(&mut self, attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
-        // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
-        // required to wrap the text. E.g.
-        // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
-        // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
-        // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
-        let mut num_of_hashes = 0;
-        let mut count = 0;
-        for ch in data.as_str().chars() {
-            count = match ch {
-                '"' => 1,
-                '#' if count > 0 => count + 1,
-                _ => 0,
-            };
-            num_of_hashes = cmp::max(num_of_hashes, count);
-        }
-
-        // `/// foo` becomes `doc = r"foo"`.
-        let delim_span = DelimSpan::from_single(span);
-        let body = TokenTree::Delimited(
-            delim_span,
-            Delimiter::Bracket,
-            [
-                TokenTree::token_alone(token::Ident(sym::doc, false), span),
-                TokenTree::token_alone(token::Eq, span),
-                TokenTree::token_alone(
-                    TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
-                    span,
-                ),
-            ]
-            .into_iter()
-            .collect::<TokenStream>(),
-        );
-
-        if attr_style == AttrStyle::Inner {
-            vec![
-                TokenTree::token_alone(token::Pound, span),
-                TokenTree::token_alone(token::Not, span),
-                body,
-            ]
-        } else {
-            vec![TokenTree::token_alone(token::Pound, span), body]
-        }
-    }
 }
 
 #[derive(Debug, Clone, PartialEq)]
@@ -451,10 +388,14 @@ pub(super) fn token_descr(token: &Token) -> String {
 impl<'a> Parser<'a> {
     pub fn new(
         sess: &'a ParseSess,
-        tokens: TokenStream,
+        mut stream: TokenStream,
         desugar_doc_comments: bool,
         subparser_name: Option<&'static str>,
     ) -> Self {
+        if desugar_doc_comments {
+            stream.desugar_doc_comments();
+        }
+
         let mut parser = Parser {
             sess,
             token: Token::dummy(),
@@ -464,10 +405,9 @@ impl<'a> Parser<'a> {
             restrictions: Restrictions::empty(),
             expected_tokens: Vec::new(),
             token_cursor: TokenCursor {
-                tree_cursor: tokens.into_trees(),
+                tree_cursor: stream.into_trees(),
                 stack: Vec::new(),
                 num_next_calls: 0,
-                desugar_doc_comments,
                 break_last_token: false,
             },
             unmatched_angle_bracket_count: 0,
author	Nicholas Nethercote <n.nethercote@gmail.com>	2023-07-30 17:16:20 +1000
committer	Nicholas Nethercote <n.nethercote@gmail.com>	2023-07-31 14:47:03 +1000
commit	ff7d5ba65e94980fbfb13793ef5d503ffad559b0 (patch)
tree	5cfec5ec4eb139687a13d8239312b90f3a533d08 /compiler/rustc_parse/src
parent	c70c8b7196539b2b0356abd7f683921a05f137e7 (diff)
download	rust-ff7d5ba65e94980fbfb13793ef5d503ffad559b0.tar.gz rust-ff7d5ba65e94980fbfb13793ef5d503ffad559b0.zip