about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2022-04-02 04:59:16 +0000
committerbors <bors@rust-lang.org>2022-04-02 04:59:16 +0000
commit95f68702ff927231ef6d37c03a8c3fcd2b6cf59b (patch)
tree522ecbd9a4744a9cdf986704ac3aa0a78b33923a
parent79f178b76ea9d5c6182f67413f62dd86b0e38508 (diff)
parentc6fedd4f1006069786a28e2054442c188408e194 (diff)
downloadrust-95f68702ff927231ef6d37c03a8c3fcd2b6cf59b.tar.gz
rust-95f68702ff927231ef6d37c03a8c3fcd2b6cf59b.zip
Auto merge of #95509 - nnethercote:simplify-MatcherPos-some-more, r=petrochenkov
Simplify `MatcherPos` some more

A few more improvements.

r? `@petrochenkov`
-rw-r--r--compiler/rustc_expand/src/lib.rs1
-rw-r--r--compiler/rustc_expand/src/mbe/macro_parser.rs214
2 files changed, 120 insertions, 95 deletions
diff --git a/compiler/rustc_expand/src/lib.rs b/compiler/rustc_expand/src/lib.rs
index 791ba560607..f5a93905b82 100644
--- a/compiler/rustc_expand/src/lib.rs
+++ b/compiler/rustc_expand/src/lib.rs
@@ -1,5 +1,6 @@
 #![feature(associated_type_bounds)]
 #![feature(associated_type_defaults)]
+#![feature(box_patterns)]
 #![feature(box_syntax)]
 #![feature(crate_visibility_modifier)]
 #![feature(decl_macro)]
diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs
index 0086983f3d9..7d4de72c3fe 100644
--- a/compiler/rustc_expand/src/mbe/macro_parser.rs
+++ b/compiler/rustc_expand/src/mbe/macro_parser.rs
@@ -75,7 +75,7 @@ crate use ParseResult::*;
 
 use crate::mbe::{self, SequenceRepetition, TokenTree};
 
-use rustc_ast::token::{self, DocComment, Nonterminal, Token};
+use rustc_ast::token::{self, DocComment, Nonterminal, Token, TokenKind};
 use rustc_parse::parser::{NtOrTt, Parser};
 use rustc_session::parse::ParseSess;
 use rustc_span::symbol::MacroRulesNormalizedIdent;
@@ -87,17 +87,6 @@ use rustc_data_structures::sync::Lrc;
 use rustc_span::symbol::Ident;
 use std::borrow::Cow;
 use std::collections::hash_map::Entry::{Occupied, Vacant};
-use std::mem;
-
-/// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have
-/// descended into.
-#[derive(Clone)]
-struct MatcherPosFrame<'tt> {
-    /// The "parent" matcher that we have descended from.
-    tts: &'tt [TokenTree],
-    /// The position of the "dot" in `tt` at the time we descended.
-    idx: usize,
-}
 
 // One element is enough to cover 95-99% of vectors for most benchmarks. Also,
 // vectors longer than one frequently have many elements, not just two or
@@ -108,6 +97,33 @@ type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
 rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
 
+#[derive(Clone)]
+enum MatcherKind<'tt> {
+    TopLevel,
+    Delimited(Box<DelimitedSubmatcher<'tt>>),
+    Sequence(Box<SequenceSubmatcher<'tt>>),
+}
+
+#[derive(Clone)]
+struct DelimitedSubmatcher<'tt> {
+    parent: Parent<'tt>,
+}
+
+#[derive(Clone)]
+struct SequenceSubmatcher<'tt> {
+    parent: Parent<'tt>,
+    seq: &'tt SequenceRepetition,
+}
+
+/// Data used to ascend from a submatcher back to its parent matcher. A subset of the fields from
+/// `MathcherPos`.
+#[derive(Clone)]
+struct Parent<'tt> {
+    tts: &'tt [TokenTree],
+    idx: usize,
+    kind: MatcherKind<'tt>,
+}
+
 /// A single matcher position, which could be within the top-level matcher, a submatcher, a
 /// subsubmatcher, etc. For example:
 /// ```text
@@ -116,13 +132,14 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
 ///                            <-------------->   first submatcher; three tts, zero metavars
 ///                  <--------------------------> top-level matcher; two tts, one metavar
 /// ```
-#[derive(Clone)]
 struct MatcherPos<'tt> {
     /// The tokens that make up the current matcher. When we are within a `Sequence` or `Delimited`
     /// submatcher, this is just the contents of that submatcher.
     tts: &'tt [TokenTree],
 
-    /// The "dot" position within the current submatcher, i.e. the index into `tts`.
+    /// The "dot" position within the current submatcher, i.e. the index into `tts`. Can go one or
+    /// two positions past the final elements in `tts` when dealing with sequences, see
+    /// `parse_tt_inner` for details.
     idx: usize,
 
     /// This vector ends up with one element per metavar in the *top-level* matcher, even when this
@@ -134,25 +151,18 @@ struct MatcherPos<'tt> {
     /// The number of sequences this mp is within.
     seq_depth: usize,
 
-    /// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are
-    /// no metavars.
-    match_lo: usize,
-
     /// The position in `matches` of the next metavar to be matched against the source token
     /// stream. Should not be used if there are no metavars.
     match_cur: usize,
 
-    /// This field is only used if we are matching a sequence.
-    sequence: Option<MatcherPosSequence<'tt>>,
-
-    /// When we are within a `Delimited` submatcher (or subsubmatcher), this tracks the parent
-    /// matcher(s). The bottom of the stack is the top-level matcher.
-    stack: SmallVec<[MatcherPosFrame<'tt>; 1]>,
+    /// What kind of matcher we are in. For submatchers, this contains enough information to
+    /// reconstitute a `MatcherPos` within the parent once we ascend out of the submatcher.
+    kind: MatcherKind<'tt>,
 }
 
 // This type is used a lot. Make sure it doesn't unintentionally get bigger.
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
-rustc_data_structures::static_assert_size!(MatcherPos<'_>, 104);
+rustc_data_structures::static_assert_size!(MatcherPos<'_>, 64);
 
 impl<'tt> MatcherPos<'tt> {
     fn top_level(matcher: &'tt [TokenTree], empty_matches: Lrc<NamedMatchVec>) -> Self {
@@ -161,31 +171,50 @@ impl<'tt> MatcherPos<'tt> {
             idx: 0,
             matches: empty_matches,
             seq_depth: 0,
-            match_lo: 0,
             match_cur: 0,
-            stack: smallvec![],
-            sequence: None,
+            kind: MatcherKind::TopLevel,
         }
     }
 
+    fn empty_sequence(
+        parent_mp: &MatcherPos<'tt>,
+        seq: &'tt SequenceRepetition,
+        empty_matches: Lrc<NamedMatchVec>,
+    ) -> Self {
+        let mut mp = MatcherPos {
+            tts: parent_mp.tts,
+            idx: parent_mp.idx + 1,
+            matches: parent_mp.matches.clone(), // a cheap clone
+            seq_depth: parent_mp.seq_depth,
+            match_cur: parent_mp.match_cur + seq.num_captures,
+            kind: parent_mp.kind.clone(), // an expensive clone
+        };
+        for idx in parent_mp.match_cur..parent_mp.match_cur + seq.num_captures {
+            mp.push_match(idx, MatchedSeq(empty_matches.clone()));
+        }
+        mp
+    }
+
     fn sequence(
-        parent: Box<MatcherPos<'tt>>,
+        parent_mp: Box<MatcherPos<'tt>>,
         seq: &'tt SequenceRepetition,
         empty_matches: Lrc<NamedMatchVec>,
     ) -> Self {
+        let seq_kind = box SequenceSubmatcher {
+            parent: Parent { tts: parent_mp.tts, idx: parent_mp.idx, kind: parent_mp.kind },
+            seq,
+        };
         let mut mp = MatcherPos {
             tts: &seq.tts,
             idx: 0,
-            matches: parent.matches.clone(),
-            seq_depth: parent.seq_depth,
-            match_lo: parent.match_cur,
-            match_cur: parent.match_cur,
-            sequence: Some(MatcherPosSequence { parent, seq }),
-            stack: smallvec![],
+            matches: parent_mp.matches,
+            seq_depth: parent_mp.seq_depth,
+            match_cur: parent_mp.match_cur,
+            kind: MatcherKind::Sequence(seq_kind),
         };
         // Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth`
         // must have the parent's depth at this point for these `push_match` calls to work.
-        for idx in mp.match_lo..mp.match_lo + seq.num_captures {
+        for idx in mp.match_cur..mp.match_cur + seq.num_captures {
             mp.push_match(idx, MatchedSeq(empty_matches.clone()));
         }
         mp.seq_depth += 1;
@@ -226,16 +255,6 @@ impl<'tt> MatcherPos<'tt> {
     }
 }
 
-#[derive(Clone)]
-struct MatcherPosSequence<'tt> {
-    /// The parent matcher position. Effectively gives a linked list of matches all the way to the
-    /// top-level matcher.
-    parent: Box<MatcherPos<'tt>>,
-
-    /// The sequence itself.
-    seq: &'tt SequenceRepetition,
-}
-
 enum EofMatcherPositions<'tt> {
     None,
     One(Box<MatcherPos<'tt>>),
@@ -448,18 +467,6 @@ impl<'tt> TtParser<'tt> {
         let mut eof_mps = EofMatcherPositions::None;
 
         while let Some(mut mp) = self.cur_mps.pop() {
-            // Backtrack out of delimited submatcher when necessary. When backtracking out again,
-            // we need to advance the "dot" past the delimiters in the parent matcher(s).
-            while mp.idx >= mp.tts.len() {
-                match mp.stack.pop() {
-                    Some(MatcherPosFrame { tts, idx }) => {
-                        mp.tts = tts;
-                        mp.idx = idx + 1;
-                    }
-                    None => break,
-                }
-            }
-
             // Get the current position of the "dot" (`idx`) in `mp` and the number of token
             // trees in the matcher (`len`).
             let idx = mp.idx;
@@ -473,13 +480,11 @@ impl<'tt> TtParser<'tt> {
                         let op = seq.kleene.op;
                         if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne {
                             // Allow for the possibility of zero matches of this sequence.
-                            let mut new_mp = mp.clone();
-                            new_mp.match_cur += seq.num_captures;
-                            new_mp.idx += 1;
-                            for idx in mp.match_cur..mp.match_cur + seq.num_captures {
-                                new_mp.push_match(idx, MatchedSeq(self.empty_matches.clone()));
-                            }
-                            self.cur_mps.push(new_mp);
+                            self.cur_mps.push(box MatcherPos::empty_sequence(
+                                &*mp,
+                                &seq,
+                                self.empty_matches.clone(),
+                            ));
                         }
 
                         // Allow for the possibility of one or more matches of this sequence.
@@ -509,16 +514,17 @@ impl<'tt> TtParser<'tt> {
                     }
 
                     TokenTree::Delimited(_, delimited) => {
-                        // To descend into a delimited submatcher, we push the current matcher onto
-                        // a stack and push a new mp containing the submatcher onto `cur_mps`.
-                        //
-                        // At the beginning of the loop, if we reach the end of the delimited
-                        // submatcher, we pop the stack to backtrack out of the descent. Note that
-                        // we use `all_tts` to include the open and close delimiter tokens.
-                        let tts = mem::replace(&mut mp.tts, &delimited.all_tts);
-                        let idx = mp.idx;
-                        mp.stack.push(MatcherPosFrame { tts, idx });
+                        // To descend into a delimited submatcher, we update `mp` appropriately,
+                        // including enough information to re-ascend afterwards, and push it onto
+                        // `cur_mps`. Later, when we reach the closing delimiter, we will recover
+                        // the parent matcher position to ascend. Note that we use `all_tts` to
+                        // include the open and close delimiter tokens.
+                        let kind = MatcherKind::Delimited(box DelimitedSubmatcher {
+                            parent: Parent { tts: mp.tts, idx: mp.idx, kind: mp.kind },
+                        });
+                        mp.tts = &delimited.all_tts;
                         mp.idx = 0;
+                        mp.kind = kind;
                         self.cur_mps.push(mp);
                     }
 
@@ -536,6 +542,18 @@ impl<'tt> TtParser<'tt> {
                             mp.idx += 1;
                             self.cur_mps.push(mp);
                         } else if token_name_eq(&t, token) {
+                            if let TokenKind::CloseDelim(_) = token.kind {
+                                // Ascend out of the delimited submatcher.
+                                debug_assert_eq!(idx, len - 1);
+                                match mp.kind {
+                                    MatcherKind::Delimited(submatcher) => {
+                                        mp.tts = submatcher.parent.tts;
+                                        mp.idx = submatcher.parent.idx;
+                                        mp.kind = submatcher.parent.kind;
+                                    }
+                                    _ => unreachable!(),
+                                }
+                            }
                             mp.idx += 1;
                             self.next_mps.push(mp);
                         }
@@ -544,38 +562,44 @@ impl<'tt> TtParser<'tt> {
                     // These cannot appear in a matcher.
                     TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
                 }
-            } else if let Some(sequence) = &mp.sequence {
+            } else if let MatcherKind::Sequence(box SequenceSubmatcher { parent, seq }) = &mp.kind {
                 // We are past the end of a sequence.
-                debug_assert!(idx <= len + 1);
+                // - If it has no separator, we must be only one past the end.
+                // - If it has a separator, we may be one past the end, in which case we must
+                //   look for a separator. Or we may be two past the end, in which case we have
+                //   already dealt with the separator.
+                debug_assert!(idx == len || idx == len + 1 && seq.separator.is_some());
 
                 if idx == len {
-                    // Add all matches from the sequence to `parent`, and move the "dot" past the
-                    // sequence in `parent`. This allows for the case where the sequence matching
-                    // is finished.
-                    let mut new_mp = sequence.parent.clone();
-                    new_mp.matches = mp.matches.clone();
-                    new_mp.match_cur = mp.match_lo + sequence.seq.num_captures;
-                    new_mp.idx += 1;
+                    // Sequence matching may have finished: move the "dot" past the sequence in
+                    // `parent`. This applies whether a separator is used or not. If sequence
+                    // matching hasn't finished, this `new_mp` will fail quietly when it is
+                    // processed next time around the loop.
+                    let new_mp = box MatcherPos {
+                        tts: parent.tts,
+                        idx: parent.idx + 1,
+                        matches: mp.matches.clone(), // a cheap clone
+                        seq_depth: mp.seq_depth - 1,
+                        match_cur: mp.match_cur,
+                        kind: parent.kind.clone(), // an expensive clone
+                    };
                     self.cur_mps.push(new_mp);
                 }
 
-                if idx == len && sequence.seq.separator.is_some() {
-                    if sequence
-                        .seq
-                        .separator
-                        .as_ref()
-                        .map_or(false, |sep| token_name_eq(token, sep))
-                    {
+                if seq.separator.is_some() && idx == len {
+                    // Look for the separator.
+                    if seq.separator.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
                         // The matcher has a separator, and it matches the current token. We can
                         // advance past the separator token.
                         mp.idx += 1;
                         self.next_mps.push(mp);
                     }
-                } else if sequence.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
-                    // We don't need a separator. Move the "dot" back to the beginning of the
-                    // matcher and try to match again UNLESS we are only allowed to have _one_
-                    // repetition.
-                    mp.match_cur = mp.match_lo;
+                } else if seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
+                    // We don't need to look for a separator: either this sequence doesn't have
+                    // one, or it does and we've already handled it. Also, we are allowed to have
+                    // more than one repetition. Move the "dot" back to the beginning of the
+                    // matcher and try to match again.
+                    mp.match_cur -= seq.num_captures;
                     mp.idx = 0;
                     self.cur_mps.push(mp);
                 }