diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2022-03-28 17:13:56 +1100 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2022-03-30 10:54:37 +1100 |
| commit | 524d21bd5495ad9941597701e7e924ade3f60b53 (patch) | |
| tree | a1c50dfac7c6d4ad29df20a15acaa425b85c895c | |
| parent | a1b140cdb70e1bcdffa9e6671b17fd69ef80c4f6 (diff) | |
| download | rust-524d21bd5495ad9941597701e7e924ade3f60b53.tar.gz rust-524d21bd5495ad9941597701e7e924ade3f60b53.zip | |
Overhaul how matches are recorded.
Currently, matches within a sequence are recorded in a new empty `matches` vector. Then when the sequence finishes the matches are merged into the `matches` vector of the parent. This commit changes things so that a sequence mp inherits the matches made so far. This means that additional matches from the sequence don't need to be merged into the parent. `push_match` becomes more complicated, and the current sequence depth needs to be tracked. But it's a sizeable performance win because it avoids one or more `push_match` calls on every iteration of a sequence. The commit also removes `match_hi`, which is no longer necessary.
| -rw-r--r-- | compiler/rustc_expand/src/mbe/macro_parser.rs | 103 |
1 files changed, 55 insertions, 48 deletions
diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index bbd83d0bc3e..15eb1635e24 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -125,33 +125,23 @@ struct MatcherPos<'tt> { /// The "dot" position within the current submatcher, i.e. the index into `tts`. idx: usize, - /// This boxed slice has one element per metavar in the *top-level* matcher, even when this + /// This vector ends up with one element per metavar in the *top-level* matcher, even when this /// `MatcherPos` is for a submatcher. Each element records token trees matched against the - /// relevant metavar by the black box parser. - /// - /// In a top-level `MatcherPos` each `NamedMatchVec` will have zero elements before processing - /// and one element after processing; the one element will be a `MatchedSeq` if the + /// relevant metavar by the black box parser. The element will be a `MatchedSeq` if the /// corresponding metavar is within a sequence. - /// - /// In a sequence submatcher each `NamedMatchVec` will have zero elements before processing and - /// any number of elements after processing (as allowed by the sequence's Kleene op, i.e. - /// zero-or-one, zero-or-more, one-or-more). After processing these elements will be merged - /// into the parent `MatcherPos`'s matches (within a `MatchedSeq`). - matches: Box<[Lrc<NamedMatchVec>]>, + matches: Lrc<NamedMatchVec>, + + /// The number of sequences this mp is within. + seq_depth: usize, /// The position in `matches` of the first metavar in this (sub)matcher. Zero if there are /// no metavars. match_lo: usize, /// The position in `matches` of the next metavar to be matched against the source token - /// stream. `match_lo <= match_cur <= match_hi`. Should not be used if there are no metavars, - /// i.e. `match_lo == match_hi`. + /// stream. Should not be used if there are no metavars. match_cur: usize, - /// The position in `matches` one past the last metavar in this (sub)matcher. Equal to - /// `match_lo` if there are not metavars. - match_hi: usize, - /// This field is only used if we are matching a sequence. sequence: Option<MatcherPosSequence<'tt>>, @@ -162,50 +152,73 @@ struct MatcherPos<'tt> { // This type is used a lot. Make sure it doesn't unintentionally get bigger. #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -rustc_data_structures::static_assert_size!(MatcherPos<'_>, 112); +rustc_data_structures::static_assert_size!(MatcherPos<'_>, 104); impl<'tt> MatcherPos<'tt> { - fn empty_matches(len: usize) -> Box<[Lrc<NamedMatchVec>]> { - if len == 0 { - vec![] - } else { - let empty_matches = Lrc::new(SmallVec::new()); - vec![empty_matches; len] - } - .into_boxed_slice() - } - fn top_level(matcher: &'tt [TokenTree]) -> Self { - let match_idx_hi = count_metavar_decls(matcher); MatcherPos { tts: matcher, idx: 0, - matches: Self::empty_matches(match_idx_hi), + matches: Lrc::new(smallvec![]), + seq_depth: 0, match_lo: 0, match_cur: 0, - match_hi: match_idx_hi, stack: smallvec![], sequence: None, } } fn sequence(parent: Box<MatcherPos<'tt>>, seq: &'tt SequenceRepetition) -> Self { - MatcherPos { + let mut mp = MatcherPos { tts: &seq.tts, idx: 0, - matches: Self::empty_matches(parent.matches.len()), + matches: parent.matches.clone(), + seq_depth: parent.seq_depth, match_lo: parent.match_cur, match_cur: parent.match_cur, - match_hi: parent.match_cur + seq.num_captures, sequence: Some(MatcherPosSequence { parent, seq }), stack: smallvec![], + }; + // Start with an empty vec for each metavar within the sequence. Note that `mp.seq_depth` + // must have the parent's depth at this point for these `push_match` calls to work. + for idx in mp.match_lo..mp.match_lo + seq.num_captures { + mp.push_match(idx, MatchedSeq(Lrc::new(smallvec![]))); } + mp.seq_depth += 1; + mp } /// Adds `m` as a named match for the `idx`-th metavar. fn push_match(&mut self, idx: usize, m: NamedMatch) { - let matches = Lrc::make_mut(&mut self.matches[idx]); - matches.push(m); + let matches = Lrc::make_mut(&mut self.matches); + match self.seq_depth { + 0 => { + // We are not within a sequence. Just append `m`. + assert_eq!(idx, matches.len()); + matches.push(m); + } + _ => { + // We are within a sequence. Find the final `MatchedSeq` at the appropriate depth + // and append `m` to its vector. + let mut curr = &mut matches[idx]; + for _ in 0..self.seq_depth - 1 { + match curr { + MatchedSeq(seq) => { + let seq = Lrc::make_mut(seq); + curr = seq.last_mut().unwrap(); + } + _ => unreachable!(), + } + } + match curr { + MatchedSeq(seq) => { + let seq = Lrc::make_mut(seq); + seq.push(m); + } + _ => unreachable!(), + } + } + } } } @@ -528,11 +541,8 @@ impl<'tt> TtParser<'tt> { // sequence in `parent`. This allows for the case where the sequence matching // is finished. let mut new_mp = sequence.parent.clone(); - for idx in mp.match_lo..mp.match_hi { - let sub = mp.matches[idx].clone(); - new_mp.push_match(idx, MatchedSeq(sub)); - } - new_mp.match_cur = mp.match_hi; + new_mp.matches = mp.matches.clone(); + new_mp.match_cur = mp.match_lo + sequence.seq.num_captures; new_mp.idx += 1; self.cur_mps.push(new_mp); } @@ -577,13 +587,10 @@ impl<'tt> TtParser<'tt> { if *token == token::Eof { Some(match eof_mps { EofMatcherPositions::One(mut eof_mp) => { - let matches = eof_mp.matches.iter_mut().map(|dv| { - // Top-level metavars only ever get one match. (Sub-matchers can get - // multiple matches, which get aggregated into a `MatcherSeq` before being - // put into the top-level.) - debug_assert_eq!(dv.len(), 1); - Lrc::make_mut(dv).pop().unwrap() - }); + assert_eq!(eof_mp.matches.len(), count_metavar_decls(matcher)); + // Need to take ownership of the matches from within the `Lrc`. + Lrc::make_mut(&mut eof_mp.matches); + let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter(); nameize(sess, matcher, matches) } EofMatcherPositions::Multiple => { |
