Rewrite `macro_rules!` parser to not use the MBE engine itself

The `macro_rules!` parser was written to match the series of rules using the macros-by-example (MBE) engine and a hand-written equivalent of the left-hand side of a MBE macro. This was complex to read, difficult to extend, and produced confusing error messages. Because it was using the MBE engine, any parse failure would be reported as if some macro was being applied to the `macro_rules!` invocation itself; for instance, errors would talk about "macro invocation", "macro arguments", and "macro call", when they were actually about the macro *definition*. And in practice, the `macro_rules!` parser only used the MBE engine to extract the left-hand side and right-hand side of each rule as a token tree, and then parsed the rest using a separate parser. Rewrite it to parse the series of rules using a simple loop, instead. This makes it more extensible in the future, and improves error messages. For instance, omitting a semicolon between rules will result in "expected `;`" and "unexpected token", rather than the confusing "no rules expected this token in macro call". This work was greatly aided by pair programming with Vincenzo Palazzo and Eric Holk.
author: Josh Triplett <josh@joshtriplett.org> 2025-06-26 14:19:15 -0700
committer: Josh Triplett <josh@joshtriplett.org> 2025-06-26 15:20:42 -0700
commit: 6c04e0a7aeeee15fc11759dd563aecfe6df194a0 (patch)
tree: 004b9b39b34533d0f891808d4bd92b9dd06243d7 /compiler/rustc_expand/src
parent: b03b3a7ec92682be2917540b679478d41c95a30c (diff)
download: rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.tar.gz
rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.zip
3 files changed, 53 insertions, 188 deletions
diff --git a/compiler/rustc_expand/src/mbe/diagnostics.rs b/compiler/rustc_expand/src/mbe/diagnostics.rs
index 99aa376626d..c607a3a3652 100644
--- a/compiler/rustc_expand/src/mbe/diagnostics.rs
+++ b/compiler/rustc_expand/src/mbe/diagnostics.rs
@@ -195,38 +195,6 @@ impl<'dcx> CollectTrackerAndEmitter<'dcx, '_> {
     }
 }
 
-/// Currently used by macro_rules! compilation to extract a little information from the `Failure`
-/// case.
-pub(crate) struct FailureForwarder<'matcher> {
-    expected_token: Option<&'matcher Token>,
-}
-
-impl<'matcher> FailureForwarder<'matcher> {
-    pub(crate) fn new() -> Self {
-        Self { expected_token: None }
-    }
-}
-
-impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> {
-    type Failure = (Token, u32, &'static str);
-
-    fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
-        (tok, position, msg)
-    }
-
-    fn description() -> &'static str {
-        "failure-forwarder"
-    }
-
-    fn set_expected_token(&mut self, tok: &'matcher Token) {
-        self.expected_token = Some(tok);
-    }
-
-    fn get_expected_token(&self) -> Option<&'matcher Token> {
-        self.expected_token
-    }
-}
-
 pub(super) fn emit_frag_parse_err(
     mut e: Diag<'_>,
     parser: &Parser<'_>,
@@ -321,7 +289,7 @@ enum ExplainDocComment {
     },
 }
 
-pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
+fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
     if let Ok(src) = sm.span_to_snippet(span) {
         if src.starts_with("///") || src.starts_with("/**") {
             err.subdiagnostic(ExplainDocComment::Outer { span });
@@ -333,7 +301,7 @@ pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Spa
 
 /// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
 /// other tokens, this is "unexpected token...".
-pub(super) fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
+fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
     if let Some(expected_token) = expected_token {
         Cow::from(format!("expected {}, found {}", token_descr(expected_token), token_descr(tok)))
     } else {
diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index 802e43209a5..3f1fc841ea3 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -536,8 +536,6 @@ impl TtParser {
                         // The separator matches the current token. Advance past it.
                         mp.idx += 1;
                         self.next_mps.push(mp);
-                    } else {
-                        track.set_expected_token(separator);
                     }
                 }
                 &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs
index 432ab324740..234e0257530 100644
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@@ -19,12 +19,13 @@ use rustc_lint_defs::BuiltinLintDiag;
 use rustc_lint_defs::builtin::{
     RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
 };
-use rustc_parse::parser::{ParseNtResult, Parser, Recovery};
+use rustc_parse::exp;
+use rustc_parse::parser::{Parser, Recovery};
 use rustc_session::Session;
 use rustc_session::parse::ParseSess;
 use rustc_span::edition::Edition;
 use rustc_span::hygiene::Transparency;
-use rustc_span::{Ident, MacroRulesNormalizedIdent, Span, kw, sym};
+use rustc_span::{Ident, Span, kw, sym};
 use tracing::{debug, instrument, trace, trace_span};
 
 use super::macro_parser::{NamedMatches, NamedParseResult};
@@ -34,8 +35,6 @@ use crate::base::{
     SyntaxExtensionKind, TTMacroExpander,
 };
 use crate::expand::{AstFragment, AstFragmentKind, ensure_complete_parse, parse_ast_fragment};
-use crate::mbe::diagnostics::{annotate_doc_comment, parse_failure_msg};
-use crate::mbe::macro_parser::NamedMatch::*;
 use crate::mbe::macro_parser::{Error, ErrorReported, Failure, MatcherLoc, Success, TtParser};
 use crate::mbe::transcribe::transcribe;
 use crate::mbe::{self, KleeneOp, macro_check};
@@ -168,11 +167,6 @@ pub(super) trait Tracker<'matcher> {
     fn recovery() -> Recovery {
         Recovery::Forbidden
     }
-
-    fn set_expected_token(&mut self, _tok: &'matcher Token) {}
-    fn get_expected_token(&self) -> Option<&'matcher Token> {
-        None
-    }
 }
 
 /// A noop tracker that is used in the hot path of the expansion, has zero overhead thanks to
@@ -360,11 +354,6 @@ pub(super) fn try_match_macro<'matcher, T: Tracker<'matcher>>(
     Err(CanRetry::Yes)
 }
 
-// Note that macro-by-example's input is also matched against a token tree:
-//                   $( $lhs:tt => $rhs:tt );+
-//
-// Holy self-referential!
-
 /// Converts a macro item into a syntax extension.
 pub fn compile_declarative_macro(
     sess: &Session,
@@ -390,153 +379,63 @@ pub fn compile_declarative_macro(
     };
     let dummy_syn_ext = |guar| (mk_syn_ext(Arc::new(DummyExpander(guar))), Vec::new());
 
-    let lhs_nm = Ident::new(sym::lhs, span);
-    let rhs_nm = Ident::new(sym::rhs, span);
-    let tt_spec = NonterminalKind::TT;
     let macro_rules = macro_def.macro_rules;
+    let exp_sep = if macro_rules { exp!(Semi) } else { exp!(Comma) };
 
-    // Parse the macro_rules! invocation
-
-    // The pattern that macro_rules matches.
-    // The grammar for macro_rules! is:
-    // $( $lhs:tt => $rhs:tt );+
-    // ...quasiquoting this would be nice.
-    // These spans won't matter, anyways
-    let argument_gram = vec![
-        mbe::TokenTree::Sequence(
-            DelimSpan::dummy(),
-            mbe::SequenceRepetition {
-                tts: vec![
-                    mbe::TokenTree::MetaVarDecl { span, name: lhs_nm, kind: tt_spec },
-                    mbe::TokenTree::token(token::FatArrow, span),
-                    mbe::TokenTree::MetaVarDecl { span, name: rhs_nm, kind: tt_spec },
-                ],
-                separator: Some(Token::new(
-                    if macro_rules { token::Semi } else { token::Comma },
-                    span,
-                )),
-                kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, span),
-                num_captures: 2,
-            },
-        ),
-        // to phase into semicolon-termination instead of semicolon-separation
-        mbe::TokenTree::Sequence(
-            DelimSpan::dummy(),
-            mbe::SequenceRepetition {
-                tts: vec![mbe::TokenTree::token(
-                    if macro_rules { token::Semi } else { token::Comma },
-                    span,
-                )],
-                separator: None,
-                kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, span),
-                num_captures: 0,
-            },
-        ),
-    ];
-    // Convert it into `MatcherLoc` form.
-    let argument_gram = mbe::macro_parser::compute_locs(&argument_gram);
-
-    let create_parser = || {
-        let body = macro_def.body.tokens.clone();
-        Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS)
-    };
-
-    let parser = create_parser();
-    let mut tt_parser =
-        TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
-    let argument_map =
-        match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
-            Success(m) => m,
-            Failure(()) => {
-                debug!("failed to parse macro tt");
-                // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
-                // with another one that gives us the information we need.
-                // For this we need to reclone the macro body as the previous parser consumed it.
-                let retry_parser = create_parser();
-
-                let mut track = diagnostics::FailureForwarder::new();
-                let parse_result =
-                    tt_parser.parse_tt(&mut Cow::Owned(retry_parser), &argument_gram, &mut track);
-                let Failure((token, _, msg)) = parse_result else {
-                    unreachable!("matcher returned something other than Failure after retry");
-                };
-
-                let s = parse_failure_msg(&token, track.get_expected_token());
-                let sp = token.span.substitute_dummy(span);
-                let mut err = sess.dcx().struct_span_err(sp, s);
-                err.span_label(sp, msg);
-                annotate_doc_comment(&mut err, sess.source_map(), sp);
-                let guar = err.emit();
-                return dummy_syn_ext(guar);
-            }
-            Error(sp, msg) => {
-                let guar = sess.dcx().span_err(sp.substitute_dummy(span), msg);
-                return dummy_syn_ext(guar);
-            }
-            ErrorReported(guar) => {
-                return dummy_syn_ext(guar);
-            }
-        };
+    let body = macro_def.body.tokens.clone();
+    let mut p = Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS);
 
+    // Don't abort iteration early, so that multiple errors can be reported.
     let mut guar = None;
     let mut check_emission = |ret: Result<(), ErrorGuaranteed>| guar = guar.or(ret.err());
 
-    // Extract the arguments:
-    let lhses = match &argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] {
-        MatchedSeq(s) => s
-            .iter()
-            .map(|m| {
-                if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
-                    let tt = mbe::quoted::parse(
-                        &TokenStream::new(vec![tt.clone()]),
-                        true,
-                        sess,
-                        node_id,
-                        features,
-                        edition,
-                    )
-                    .pop()
-                    .unwrap();
-                    // We don't handle errors here, the driver will abort
-                    // after parsing/expansion. We can report every error in every macro this way.
-                    check_emission(check_lhs_nt_follows(sess, node_id, &tt));
-                    return tt;
-                }
-                sess.dcx().span_bug(span, "wrong-structured lhs")
-            })
-            .collect::<Vec<mbe::TokenTree>>(),
-        _ => sess.dcx().span_bug(span, "wrong-structured lhs"),
-    };
-
-    let rhses = match &argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] {
-        MatchedSeq(s) => s
-            .iter()
-            .map(|m| {
-                if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
-                    return mbe::quoted::parse(
-                        &TokenStream::new(vec![tt.clone()]),
-                        false,
-                        sess,
-                        node_id,
-                        features,
-                        edition,
-                    )
-                    .pop()
-                    .unwrap();
-                }
-                sess.dcx().span_bug(span, "wrong-structured rhs")
-            })
-            .collect::<Vec<mbe::TokenTree>>(),
-        _ => sess.dcx().span_bug(span, "wrong-structured rhs"),
-    };
+    let mut lhses = Vec::new();
+    let mut rhses = Vec::new();
 
-    for rhs in &rhses {
-        check_emission(check_rhs(sess, rhs));
+    while p.token != token::Eof {
+        let lhs_tt = p.parse_token_tree();
+        let lhs_tt = mbe::quoted::parse(
+            &TokenStream::new(vec![lhs_tt]),
+            true, // LHS
+            sess,
+            node_id,
+            features,
+            edition,
+        )
+        .pop()
+        .unwrap();
+        // We don't handle errors here, the driver will abort after parsing/expansion. We can
+        // report every error in every macro this way.
+        check_emission(check_lhs_nt_follows(sess, node_id, &lhs_tt));
+        check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(&lhs_tt)));
+        if let Err(e) = p.expect(exp!(FatArrow)) {
+            return dummy_syn_ext(e.emit());
+        }
+        let rhs_tt = p.parse_token_tree();
+        let rhs_tt = mbe::quoted::parse(
+            &TokenStream::new(vec![rhs_tt]),
+            false, // RHS
+            sess,
+            node_id,
+            features,
+            edition,
+        )
+        .pop()
+        .unwrap();
+        check_emission(check_rhs(sess, &rhs_tt));
+        lhses.push(lhs_tt);
+        rhses.push(rhs_tt);
+        if p.token == token::Eof {
+            break;
+        }
+        if let Err(e) = p.expect(exp_sep) {
+            return dummy_syn_ext(e.emit());
+        }
     }
 
-    // Don't abort iteration early, so that errors for multiple lhses can be reported.
-    for lhs in &lhses {
-        check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(lhs)));
+    if lhses.is_empty() {
+        let guar = sess.dcx().span_err(span, "macros must contain at least one rule");
+        return dummy_syn_ext(guar);
     }
 
     check_emission(macro_check::check_meta_variables(&sess.psess, node_id, span, &lhses, &rhses));
author	Josh Triplett <josh@joshtriplett.org>	2025-06-26 14:19:15 -0700
committer	Josh Triplett <josh@joshtriplett.org>	2025-06-26 15:20:42 -0700
commit	6c04e0a7aeeee15fc11759dd563aecfe6df194a0 (patch)
tree	004b9b39b34533d0f891808d4bd92b9dd06243d7 /compiler/rustc_expand/src
parent	b03b3a7ec92682be2917540b679478d41c95a30c (diff)
download	rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.tar.gz rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.zip