Rewrite `macro_rules!` parser to not use the MBE engine itself

The `macro_rules!` parser was written to match the series of rules using the macros-by-example (MBE) engine and a hand-written equivalent of the left-hand side of a MBE macro. This was complex to read, difficult to extend, and produced confusing error messages. Because it was using the MBE engine, any parse failure would be reported as if some macro was being applied to the `macro_rules!` invocation itself; for instance, errors would talk about "macro invocation", "macro arguments", and "macro call", when they were actually about the macro *definition*. And in practice, the `macro_rules!` parser only used the MBE engine to extract the left-hand side and right-hand side of each rule as a token tree, and then parsed the rest using a separate parser. Rewrite it to parse the series of rules using a simple loop, instead. This makes it more extensible in the future, and improves error messages. For instance, omitting a semicolon between rules will result in "expected `;`" and "unexpected token", rather than the confusing "no rules expected this token in macro call". This work was greatly aided by pair programming with Vincenzo Palazzo and Eric Holk.
author: Josh Triplett <josh@joshtriplett.org> 2025-06-26 14:19:15 -0700
committer: Josh Triplett <josh@joshtriplett.org> 2025-06-26 15:20:42 -0700
commit: 6c04e0a7aeeee15fc11759dd563aecfe6df194a0 (patch)
tree: 004b9b39b34533d0f891808d4bd92b9dd06243d7
parent: b03b3a7ec92682be2917540b679478d41c95a30c (diff)
download: rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.tar.gz
rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.zip
11 files changed, 65 insertions, 199 deletions
diff --git a/compiler/rustc_expand/src/mbe/diagnostics.rs b/compiler/rustc_expand/src/mbe/diagnostics.rs
index 99aa376626d..c607a3a3652 100644
--- a/compiler/rustc_expand/src/mbe/diagnostics.rs
+++ b/compiler/rustc_expand/src/mbe/diagnostics.rs
@@ -195,38 +195,6 @@ impl<'dcx> CollectTrackerAndEmitter<'dcx, '_> {
     }
 }
 
-/// Currently used by macro_rules! compilation to extract a little information from the `Failure`
-/// case.
-pub(crate) struct FailureForwarder<'matcher> {
-    expected_token: Option<&'matcher Token>,
-}
-
-impl<'matcher> FailureForwarder<'matcher> {
-    pub(crate) fn new() -> Self {
-        Self { expected_token: None }
-    }
-}
-
-impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> {
-    type Failure = (Token, u32, &'static str);
-
-    fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
-        (tok, position, msg)
-    }
-
-    fn description() -> &'static str {
-        "failure-forwarder"
-    }
-
-    fn set_expected_token(&mut self, tok: &'matcher Token) {
-        self.expected_token = Some(tok);
-    }
-
-    fn get_expected_token(&self) -> Option<&'matcher Token> {
-        self.expected_token
-    }
-}
-
 pub(super) fn emit_frag_parse_err(
     mut e: Diag<'_>,
     parser: &Parser<'_>,
@@ -321,7 +289,7 @@ enum ExplainDocComment {
     },
 }
 
-pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
+fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
     if let Ok(src) = sm.span_to_snippet(span) {
         if src.starts_with("///") || src.starts_with("/**") {
             err.subdiagnostic(ExplainDocComment::Outer { span });
@@ -333,7 +301,7 @@ pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Spa
 
 /// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
 /// other tokens, this is "unexpected token...".
-pub(super) fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
+fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
     if let Some(expected_token) = expected_token {
         Cow::from(format!("expected {}, found {}", token_descr(expected_token), token_descr(tok)))
     } else {
diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index 802e43209a5..3f1fc841ea3 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -536,8 +536,6 @@ impl TtParser {
                         // The separator matches the current token. Advance past it.
                         mp.idx += 1;
                         self.next_mps.push(mp);
-                    } else {
-                        track.set_expected_token(separator);
                     }
                 }
                 &MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs
index 432ab324740..234e0257530 100644
--- a/compiler/rustc_expand/src/mbe/macro_rules.rs
+++ b/compiler/rustc_expand/src/mbe/macro_rules.rs
@@ -19,12 +19,13 @@ use rustc_lint_defs::BuiltinLintDiag;
 use rustc_lint_defs::builtin::{
     RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
 };
-use rustc_parse::parser::{ParseNtResult, Parser, Recovery};
+use rustc_parse::exp;
+use rustc_parse::parser::{Parser, Recovery};
 use rustc_session::Session;
 use rustc_session::parse::ParseSess;
 use rustc_span::edition::Edition;
 use rustc_span::hygiene::Transparency;
-use rustc_span::{Ident, MacroRulesNormalizedIdent, Span, kw, sym};
+use rustc_span::{Ident, Span, kw, sym};
 use tracing::{debug, instrument, trace, trace_span};
 
 use super::macro_parser::{NamedMatches, NamedParseResult};
@@ -34,8 +35,6 @@ use crate::base::{
     SyntaxExtensionKind, TTMacroExpander,
 };
 use crate::expand::{AstFragment, AstFragmentKind, ensure_complete_parse, parse_ast_fragment};
-use crate::mbe::diagnostics::{annotate_doc_comment, parse_failure_msg};
-use crate::mbe::macro_parser::NamedMatch::*;
 use crate::mbe::macro_parser::{Error, ErrorReported, Failure, MatcherLoc, Success, TtParser};
 use crate::mbe::transcribe::transcribe;
 use crate::mbe::{self, KleeneOp, macro_check};
@@ -168,11 +167,6 @@ pub(super) trait Tracker<'matcher> {
     fn recovery() -> Recovery {
         Recovery::Forbidden
     }
-
-    fn set_expected_token(&mut self, _tok: &'matcher Token) {}
-    fn get_expected_token(&self) -> Option<&'matcher Token> {
-        None
-    }
 }
 
 /// A noop tracker that is used in the hot path of the expansion, has zero overhead thanks to
@@ -360,11 +354,6 @@ pub(super) fn try_match_macro<'matcher, T: Tracker<'matcher>>(
     Err(CanRetry::Yes)
 }
 
-// Note that macro-by-example's input is also matched against a token tree:
-//                   $( $lhs:tt => $rhs:tt );+
-//
-// Holy self-referential!
-
 /// Converts a macro item into a syntax extension.
 pub fn compile_declarative_macro(
     sess: &Session,
@@ -390,153 +379,63 @@ pub fn compile_declarative_macro(
     };
     let dummy_syn_ext = |guar| (mk_syn_ext(Arc::new(DummyExpander(guar))), Vec::new());
 
-    let lhs_nm = Ident::new(sym::lhs, span);
-    let rhs_nm = Ident::new(sym::rhs, span);
-    let tt_spec = NonterminalKind::TT;
     let macro_rules = macro_def.macro_rules;
+    let exp_sep = if macro_rules { exp!(Semi) } else { exp!(Comma) };
 
-    // Parse the macro_rules! invocation
-
-    // The pattern that macro_rules matches.
-    // The grammar for macro_rules! is:
-    // $( $lhs:tt => $rhs:tt );+
-    // ...quasiquoting this would be nice.
-    // These spans won't matter, anyways
-    let argument_gram = vec![
-        mbe::TokenTree::Sequence(
-            DelimSpan::dummy(),
-            mbe::SequenceRepetition {
-                tts: vec![
-                    mbe::TokenTree::MetaVarDecl { span, name: lhs_nm, kind: tt_spec },
-                    mbe::TokenTree::token(token::FatArrow, span),
-                    mbe::TokenTree::MetaVarDecl { span, name: rhs_nm, kind: tt_spec },
-                ],
-                separator: Some(Token::new(
-                    if macro_rules { token::Semi } else { token::Comma },
-                    span,
-                )),
-                kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, span),
-                num_captures: 2,
-            },
-        ),
-        // to phase into semicolon-termination instead of semicolon-separation
-        mbe::TokenTree::Sequence(
-            DelimSpan::dummy(),
-            mbe::SequenceRepetition {
-                tts: vec![mbe::TokenTree::token(
-                    if macro_rules { token::Semi } else { token::Comma },
-                    span,
-                )],
-                separator: None,
-                kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, span),
-                num_captures: 0,
-            },
-        ),
-    ];
-    // Convert it into `MatcherLoc` form.
-    let argument_gram = mbe::macro_parser::compute_locs(&argument_gram);
-
-    let create_parser = || {
-        let body = macro_def.body.tokens.clone();
-        Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS)
-    };
-
-    let parser = create_parser();
-    let mut tt_parser =
-        TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
-    let argument_map =
-        match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
-            Success(m) => m,
-            Failure(()) => {
-                debug!("failed to parse macro tt");
-                // The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
-                // with another one that gives us the information we need.
-                // For this we need to reclone the macro body as the previous parser consumed it.
-                let retry_parser = create_parser();
-
-                let mut track = diagnostics::FailureForwarder::new();
-                let parse_result =
-                    tt_parser.parse_tt(&mut Cow::Owned(retry_parser), &argument_gram, &mut track);
-                let Failure((token, _, msg)) = parse_result else {
-                    unreachable!("matcher returned something other than Failure after retry");
-                };
-
-                let s = parse_failure_msg(&token, track.get_expected_token());
-                let sp = token.span.substitute_dummy(span);
-                let mut err = sess.dcx().struct_span_err(sp, s);
-                err.span_label(sp, msg);
-                annotate_doc_comment(&mut err, sess.source_map(), sp);
-                let guar = err.emit();
-                return dummy_syn_ext(guar);
-            }
-            Error(sp, msg) => {
-                let guar = sess.dcx().span_err(sp.substitute_dummy(span), msg);
-                return dummy_syn_ext(guar);
-            }
-            ErrorReported(guar) => {
-                return dummy_syn_ext(guar);
-            }
-        };
+    let body = macro_def.body.tokens.clone();
+    let mut p = Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS);
 
+    // Don't abort iteration early, so that multiple errors can be reported.
     let mut guar = None;
     let mut check_emission = |ret: Result<(), ErrorGuaranteed>| guar = guar.or(ret.err());
 
-    // Extract the arguments:
-    let lhses = match &argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] {
-        MatchedSeq(s) => s
-            .iter()
-            .map(|m| {
-                if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
-                    let tt = mbe::quoted::parse(
-                        &TokenStream::new(vec![tt.clone()]),
-                        true,
-                        sess,
-                        node_id,
-                        features,
-                        edition,
-                    )
-                    .pop()
-                    .unwrap();
-                    // We don't handle errors here, the driver will abort
-                    // after parsing/expansion. We can report every error in every macro this way.
-                    check_emission(check_lhs_nt_follows(sess, node_id, &tt));
-                    return tt;
-                }
-                sess.dcx().span_bug(span, "wrong-structured lhs")
-            })
-            .collect::<Vec<mbe::TokenTree>>(),
-        _ => sess.dcx().span_bug(span, "wrong-structured lhs"),
-    };
-
-    let rhses = match &argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] {
-        MatchedSeq(s) => s
-            .iter()
-            .map(|m| {
-                if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
-                    return mbe::quoted::parse(
-                        &TokenStream::new(vec![tt.clone()]),
-                        false,
-                        sess,
-                        node_id,
-                        features,
-                        edition,
-                    )
-                    .pop()
-                    .unwrap();
-                }
-                sess.dcx().span_bug(span, "wrong-structured rhs")
-            })
-            .collect::<Vec<mbe::TokenTree>>(),
-        _ => sess.dcx().span_bug(span, "wrong-structured rhs"),
-    };
+    let mut lhses = Vec::new();
+    let mut rhses = Vec::new();
 
-    for rhs in &rhses {
-        check_emission(check_rhs(sess, rhs));
+    while p.token != token::Eof {
+        let lhs_tt = p.parse_token_tree();
+        let lhs_tt = mbe::quoted::parse(
+            &TokenStream::new(vec![lhs_tt]),
+            true, // LHS
+            sess,
+            node_id,
+            features,
+            edition,
+        )
+        .pop()
+        .unwrap();
+        // We don't handle errors here, the driver will abort after parsing/expansion. We can
+        // report every error in every macro this way.
+        check_emission(check_lhs_nt_follows(sess, node_id, &lhs_tt));
+        check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(&lhs_tt)));
+        if let Err(e) = p.expect(exp!(FatArrow)) {
+            return dummy_syn_ext(e.emit());
+        }
+        let rhs_tt = p.parse_token_tree();
+        let rhs_tt = mbe::quoted::parse(
+            &TokenStream::new(vec![rhs_tt]),
+            false, // RHS
+            sess,
+            node_id,
+            features,
+            edition,
+        )
+        .pop()
+        .unwrap();
+        check_emission(check_rhs(sess, &rhs_tt));
+        lhses.push(lhs_tt);
+        rhses.push(rhs_tt);
+        if p.token == token::Eof {
+            break;
+        }
+        if let Err(e) = p.expect(exp_sep) {
+            return dummy_syn_ext(e.emit());
+        }
     }
 
-    // Don't abort iteration early, so that errors for multiple lhses can be reported.
-    for lhs in &lhses {
-        check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(lhs)));
+    if lhses.is_empty() {
+        let guar = sess.dcx().span_err(span, "macros must contain at least one rule");
+        return dummy_syn_ext(guar);
     }
 
     check_emission(macro_check::check_meta_variables(&sess.psess, node_id, span, &lhses, &rhses));
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 11463ad354a..c17411d55f7 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -1777,7 +1777,6 @@ symbols! {
         resume,
         return_position_impl_trait_in_trait,
         return_type_notation,
-        rhs,
         riscv_target_feature,
         rlib,
         ropi,
diff --git a/tests/ui/attributes/crate-type-macro-empty.rs b/tests/ui/attributes/crate-type-macro-empty.rs
index 5ff7fc002fd..217ff598f7a 100644
--- a/tests/ui/attributes/crate-type-macro-empty.rs
+++ b/tests/ui/attributes/crate-type-macro-empty.rs
@@ -2,6 +2,6 @@
 #[crate_type = foo!()]
 //~^ ERROR cannot find macro `foo` in this scope
 
-macro_rules! foo {} //~ ERROR unexpected end of macro invocation
+macro_rules! foo {} //~ ERROR macros must contain at least one rule
 
 fn main() {}
diff --git a/tests/ui/attributes/crate-type-macro-empty.stderr b/tests/ui/attributes/crate-type-macro-empty.stderr
index e48d3d95470..130fa454ca1 100644
--- a/tests/ui/attributes/crate-type-macro-empty.stderr
+++ b/tests/ui/attributes/crate-type-macro-empty.stderr
@@ -1,8 +1,8 @@
-error: unexpected end of macro invocation
+error: macros must contain at least one rule
   --> $DIR/crate-type-macro-empty.rs:5:1
    |
 LL | macro_rules! foo {}
-   | ^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
+   | ^^^^^^^^^^^^^^^^^^^
 
 error: cannot find macro `foo` in this scope
   --> $DIR/crate-type-macro-empty.rs:2:16
diff --git a/tests/ui/macros/missing-semi.stderr b/tests/ui/macros/missing-semi.stderr
index 0a7afe50059..c2e12adbb4b 100644
--- a/tests/ui/macros/missing-semi.stderr
+++ b/tests/ui/macros/missing-semi.stderr
@@ -1,8 +1,10 @@
 error: expected `;`, found `(`
   --> $DIR/missing-semi.rs:6:5
    |
+LL |     }
+   |      - expected `;`
 LL |     () => {
-   |     ^ no rules expected this token in macro call
+   |     ^ unexpected token
 
 error: aborting due to 1 previous error
 
diff --git a/tests/ui/parser/issues/issue-7970b.rs b/tests/ui/parser/issues/issue-7970b.rs
index 1c4abce3959..ae06aff7cef 100644
--- a/tests/ui/parser/issues/issue-7970b.rs
+++ b/tests/ui/parser/issues/issue-7970b.rs
@@ -1,4 +1,4 @@
 fn main() {}
 
 macro_rules! test {}
-//~^ ERROR unexpected end of macro invocation
+//~^ ERROR macros must contain at least one rule
diff --git a/tests/ui/parser/issues/issue-7970b.stderr b/tests/ui/parser/issues/issue-7970b.stderr
index b23b09e752c..4715eb07c6d 100644
--- a/tests/ui/parser/issues/issue-7970b.stderr
+++ b/tests/ui/parser/issues/issue-7970b.stderr
@@ -1,8 +1,8 @@
-error: unexpected end of macro invocation
+error: macros must contain at least one rule
   --> $DIR/issue-7970b.rs:3:1
    |
 LL | macro_rules! test {}
-   | ^^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
+   | ^^^^^^^^^^^^^^^^^^^^
 
 error: aborting due to 1 previous error
 
diff --git a/tests/ui/parser/macros-no-semicolon-items.rs b/tests/ui/parser/macros-no-semicolon-items.rs
index 3afc275d61a..86889279cea 100644
--- a/tests/ui/parser/macros-no-semicolon-items.rs
+++ b/tests/ui/parser/macros-no-semicolon-items.rs
@@ -1,5 +1,5 @@
 macro_rules! foo()  //~ ERROR semicolon
-                    //~| ERROR unexpected end of macro
+                    //~| ERROR macros must contain at least one rule
 
 macro_rules! bar {
     ($($tokens:tt)*) => {}
diff --git a/tests/ui/parser/macros-no-semicolon-items.stderr b/tests/ui/parser/macros-no-semicolon-items.stderr
index 07fa2439df5..f8f3ed83688 100644
--- a/tests/ui/parser/macros-no-semicolon-items.stderr
+++ b/tests/ui/parser/macros-no-semicolon-items.stderr
@@ -38,11 +38,11 @@ help: add a semicolon
 LL | );
    |  +
 
-error: unexpected end of macro invocation
+error: macros must contain at least one rule
   --> $DIR/macros-no-semicolon-items.rs:1:1
    |
 LL | macro_rules! foo()
-   | ^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
+   | ^^^^^^^^^^^^^^^^^^
 
 error: aborting due to 3 previous errors
author	Josh Triplett <josh@joshtriplett.org>	2025-06-26 14:19:15 -0700
committer	Josh Triplett <josh@joshtriplett.org>	2025-06-26 15:20:42 -0700
commit	6c04e0a7aeeee15fc11759dd563aecfe6df194a0 (patch)
tree	004b9b39b34533d0f891808d4bd92b9dd06243d7
parent	b03b3a7ec92682be2917540b679478d41c95a30c (diff)
download	rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.tar.gz rust-6c04e0a7aeeee15fc11759dd563aecfe6df194a0.zip