From 827a5b2ea8dc66bfdf817f52011f470f00bc6fee Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 22 Sep 2019 18:19:51 +0300 Subject: rename libsyntax::ext::tt to mbe mbe stands for macro-by-example --- src/libsyntax/ext/mbe/macro_check.rs | 626 ++++++++++++++++++ src/libsyntax/ext/mbe/macro_parser.rs | 952 ++++++++++++++++++++++++++ src/libsyntax/ext/mbe/macro_rules.rs | 1173 +++++++++++++++++++++++++++++++++ src/libsyntax/ext/mbe/quoted.rs | 433 ++++++++++++ src/libsyntax/ext/mbe/transcribe.rs | 398 +++++++++++ src/libsyntax/ext/tt/macro_check.rs | 626 ------------------ src/libsyntax/ext/tt/macro_parser.rs | 952 -------------------------- src/libsyntax/ext/tt/macro_rules.rs | 1173 --------------------------------- src/libsyntax/ext/tt/quoted.rs | 433 ------------ src/libsyntax/ext/tt/transcribe.rs | 398 ----------- 10 files changed, 3582 insertions(+), 3582 deletions(-) create mode 100644 src/libsyntax/ext/mbe/macro_check.rs create mode 100644 src/libsyntax/ext/mbe/macro_parser.rs create mode 100644 src/libsyntax/ext/mbe/macro_rules.rs create mode 100644 src/libsyntax/ext/mbe/quoted.rs create mode 100644 src/libsyntax/ext/mbe/transcribe.rs delete mode 100644 src/libsyntax/ext/tt/macro_check.rs delete mode 100644 src/libsyntax/ext/tt/macro_parser.rs delete mode 100644 src/libsyntax/ext/tt/macro_rules.rs delete mode 100644 src/libsyntax/ext/tt/quoted.rs delete mode 100644 src/libsyntax/ext/tt/transcribe.rs (limited to 'src/libsyntax/ext') diff --git a/src/libsyntax/ext/mbe/macro_check.rs b/src/libsyntax/ext/mbe/macro_check.rs new file mode 100644 index 00000000000..a1734689595 --- /dev/null +++ b/src/libsyntax/ext/mbe/macro_check.rs @@ -0,0 +1,626 @@ +//! Checks that meta-variables in macro definition are correctly declared and used. +//! +//! # What is checked +//! +//! ## Meta-variables must not be bound twice +//! +//! ``` +//! macro_rules! foo { ($x:tt $x:tt) => { $x }; } +//! ``` +//! +//! This check is sound (no false-negative) and complete (no false-positive). +//! +//! ## Meta-variables must not be free +//! +//! ``` +//! macro_rules! foo { () => { $x }; } +//! ``` +//! +//! This check is also done at macro instantiation but only if the branch is taken. +//! +//! ## Meta-variables must repeat at least as many times as their binder +//! +//! ``` +//! macro_rules! foo { ($($x:tt)*) => { $x }; } +//! ``` +//! +//! This check is also done at macro instantiation but only if the branch is taken. +//! +//! ## Meta-variables must repeat with the same Kleene operators as their binder +//! +//! ``` +//! macro_rules! foo { ($($x:tt)+) => { $($x)* }; } +//! ``` +//! +//! This check is not done at macro instantiation. +//! +//! # Disclaimer +//! +//! In the presence of nested macros (a macro defined in a macro), those checks may have false +//! positives and false negatives. We try to detect those cases by recognizing potential macro +//! definitions in RHSes, but nested macros may be hidden through the use of particular values of +//! meta-variables. +//! +//! ## Examples of false positive +//! +//! False positives can come from cases where we don't recognize a nested macro, because it depends +//! on particular values of meta-variables. In the following example, we think both instances of +//! `$x` are free, which is a correct statement if `$name` is anything but `macro_rules`. But when +//! `$name` is `macro_rules`, like in the instantiation below, then `$x:tt` is actually a binder of +//! the nested macro and `$x` is bound to it. +//! +//! ``` +//! macro_rules! foo { ($name:ident) => { $name! bar { ($x:tt) => { $x }; } }; } +//! foo!(macro_rules); +//! ``` +//! +//! False positives can also come from cases where we think there is a nested macro while there +//! isn't. In the following example, we think `$x` is free, which is incorrect because `bar` is not +//! a nested macro since it is not evaluated as code by `stringify!`. +//! +//! ``` +//! macro_rules! foo { () => { stringify!(macro_rules! bar { () => { $x }; }) }; } +//! ``` +//! +//! ## Examples of false negative +//! +//! False negatives can come from cases where we don't recognize a meta-variable, because it depends +//! on particular values of meta-variables. In the following examples, we don't see that if `$d` is +//! instantiated with `$` then `$d z` becomes `$z` in the nested macro definition and is thus a free +//! meta-variable. Note however, that if `foo` is instantiated, then we would check the definition +//! of `bar` and would see the issue. +//! +//! ``` +//! macro_rules! foo { ($d:tt) => { macro_rules! bar { ($y:tt) => { $d z }; } }; } +//! ``` +//! +//! # How it is checked +//! +//! There are 3 main functions: `check_binders`, `check_occurrences`, and `check_nested_macro`. They +//! all need some kind of environment. +//! +//! ## Environments +//! +//! Environments are used to pass information. +//! +//! ### From LHS to RHS +//! +//! When checking a LHS with `check_binders`, we produce (and use) an environment for binders, +//! namely `Binders`. This is a mapping from binder name to information about that binder: the span +//! of the binder for error messages and the stack of Kleene operators under which it was bound in +//! the LHS. +//! +//! This environment is used by both the LHS and RHS. The LHS uses it to detect duplicate binders. +//! The RHS uses it to detect the other errors. +//! +//! ### From outer macro to inner macro +//! +//! When checking the RHS of an outer macro and we detect a nested macro definition, we push the +//! current state, namely `MacroState`, to an environment of nested macro definitions. Each state +//! stores the LHS binders when entering the macro definition as well as the stack of Kleene +//! operators under which the inner macro is defined in the RHS. +//! +//! This environment is a stack representing the nesting of macro definitions. As such, the stack of +//! Kleene operators under which a meta-variable is repeating is the concatenation of the stacks +//! stored when entering a macro definition starting from the state in which the meta-variable is +//! bound. +use crate::ast::NodeId; +use crate::early_buffered_lints::BufferedEarlyLintId; +use crate::ext::tt::quoted::{KleeneToken, TokenTree}; +use crate::parse::token::TokenKind; +use crate::parse::token::{DelimToken, Token}; +use crate::parse::ParseSess; +use crate::symbol::{kw, sym}; + +use rustc_data_structures::fx::FxHashMap; +use smallvec::SmallVec; +use syntax_pos::{symbol::Ident, MultiSpan, Span}; + +/// Stack represented as linked list. +/// +/// Those are used for environments because they grow incrementally and are not mutable. +enum Stack<'a, T> { + /// Empty stack. + Empty, + /// A non-empty stack. + Push { + /// The top element. + top: T, + /// The previous elements. + prev: &'a Stack<'a, T>, + }, +} + +impl<'a, T> Stack<'a, T> { + /// Returns whether a stack is empty. + fn is_empty(&self) -> bool { + match *self { + Stack::Empty => true, + _ => false, + } + } + + /// Returns a new stack with an element of top. + fn push(&'a self, top: T) -> Stack<'a, T> { + Stack::Push { top, prev: self } + } +} + +impl<'a, T> Iterator for &'a Stack<'a, T> { + type Item = &'a T; + + // Iterates from top to bottom of the stack. + fn next(&mut self) -> Option<&'a T> { + match *self { + Stack::Empty => None, + Stack::Push { ref top, ref prev } => { + *self = prev; + Some(top) + } + } + } +} + +impl From<&Stack<'_, KleeneToken>> for SmallVec<[KleeneToken; 1]> { + fn from(ops: &Stack<'_, KleeneToken>) -> SmallVec<[KleeneToken; 1]> { + let mut ops: SmallVec<[KleeneToken; 1]> = ops.cloned().collect(); + // The stack is innermost on top. We want outermost first. + ops.reverse(); + ops + } +} + +/// Information attached to a meta-variable binder in LHS. +struct BinderInfo { + /// The span of the meta-variable in LHS. + span: Span, + /// The stack of Kleene operators (outermost first). + ops: SmallVec<[KleeneToken; 1]>, +} + +/// An environment of meta-variables to their binder information. +type Binders = FxHashMap; + +/// The state at which we entered a macro definition in the RHS of another macro definition. +struct MacroState<'a> { + /// The binders of the branch where we entered the macro definition. + binders: &'a Binders, + /// The stack of Kleene operators (outermost first) where we entered the macro definition. + ops: SmallVec<[KleeneToken; 1]>, +} + +/// Checks that meta-variables are used correctly in a macro definition. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `span` is used when no spans are available +/// - `lhses` and `rhses` should have the same length and represent the macro definition +crate fn check_meta_variables( + sess: &ParseSess, + node_id: NodeId, + span: Span, + lhses: &[TokenTree], + rhses: &[TokenTree], +) -> bool { + if lhses.len() != rhses.len() { + sess.span_diagnostic.span_bug(span, "length mismatch between LHSes and RHSes") + } + let mut valid = true; + for (lhs, rhs) in lhses.iter().zip(rhses.iter()) { + let mut binders = Binders::default(); + check_binders(sess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut valid); + check_occurrences(sess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut valid); + } + valid +} + +/// Checks `lhs` as part of the LHS of a macro definition, extends `binders` with new binders, and +/// sets `valid` to false in case of errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `lhs` is checked as part of a LHS +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the LHS +/// - `ops` is the stack of Kleene operators from the LHS +/// - `valid` is set in case of errors +fn check_binders( + sess: &ParseSess, + node_id: NodeId, + lhs: &TokenTree, + macros: &Stack<'_, MacroState<'_>>, + binders: &mut Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + match *lhs { + TokenTree::Token(..) => {} + // This can only happen when checking a nested macro because this LHS is then in the RHS of + // the outer macro. See ui/macros/macro-of-higher-order.rs where $y:$fragment in the + // LHS of the nested macro (and RHS of the outer macro) is parsed as MetaVar(y) Colon + // MetaVar(fragment) and not as MetaVarDecl(y, fragment). + TokenTree::MetaVar(span, name) => { + if macros.is_empty() { + sess.span_diagnostic.span_bug(span, "unexpected MetaVar in lhs"); + } + // There are 3 possibilities: + if let Some(prev_info) = binders.get(&name) { + // 1. The meta-variable is already bound in the current LHS: This is an error. + let mut span = MultiSpan::from_span(span); + span.push_span_label(prev_info.span, "previous declaration".into()); + buffer_lint(sess, span, node_id, "duplicate matcher binding"); + } else if get_binder_info(macros, binders, name).is_none() { + // 2. The meta-variable is free: This is a binder. + binders.insert(name, BinderInfo { span, ops: ops.into() }); + } else { + // 3. The meta-variable is bound: This is an occurrence. + check_occurrences(sess, node_id, lhs, macros, binders, ops, valid); + } + } + // Similarly, this can only happen when checking a toplevel macro. + TokenTree::MetaVarDecl(span, name, _kind) => { + if !macros.is_empty() { + sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in nested lhs"); + } + if let Some(prev_info) = get_binder_info(macros, binders, name) { + // Duplicate binders at the top-level macro definition are errors. The lint is only + // for nested macro definitions. + sess.span_diagnostic + .struct_span_err(span, "duplicate matcher binding") + .span_note(prev_info.span, "previous declaration was here") + .emit(); + *valid = false; + } else { + binders.insert(name, BinderInfo { span, ops: ops.into() }); + } + } + TokenTree::Delimited(_, ref del) => { + for tt in &del.tts { + check_binders(sess, node_id, tt, macros, binders, ops, valid); + } + } + TokenTree::Sequence(_, ref seq) => { + let ops = ops.push(seq.kleene); + for tt in &seq.tts { + check_binders(sess, node_id, tt, macros, binders, &ops, valid); + } + } + } +} + +/// Returns the binder information of a meta-variable. +/// +/// Arguments: +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the current binders +/// - `name` is the name of the meta-variable we are looking for +fn get_binder_info<'a>( + mut macros: &'a Stack<'a, MacroState<'a>>, + binders: &'a Binders, + name: Ident, +) -> Option<&'a BinderInfo> { + binders.get(&name).or_else(|| macros.find_map(|state| state.binders.get(&name))) +} + +/// Checks `rhs` as part of the RHS of a macro definition and sets `valid` to false in case of +/// errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `rhs` is checked as part of a RHS +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `valid` is set in case of errors +fn check_occurrences( + sess: &ParseSess, + node_id: NodeId, + rhs: &TokenTree, + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + match *rhs { + TokenTree::Token(..) => {} + TokenTree::MetaVarDecl(span, _name, _kind) => { + sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in rhs") + } + TokenTree::MetaVar(span, name) => { + check_ops_is_prefix(sess, node_id, macros, binders, ops, span, name); + } + TokenTree::Delimited(_, ref del) => { + check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid); + } + TokenTree::Sequence(_, ref seq) => { + let ops = ops.push(seq.kleene); + check_nested_occurrences(sess, node_id, &seq.tts, macros, binders, &ops, valid); + } + } +} + +/// Represents the processed prefix of a nested macro. +#[derive(Clone, Copy, PartialEq, Eq)] +enum NestedMacroState { + /// Nothing that matches a nested macro definition was processed yet. + Empty, + /// The token `macro_rules` was processed. + MacroRules, + /// The tokens `macro_rules!` were processed. + MacroRulesNot, + /// The tokens `macro_rules!` followed by a name were processed. The name may be either directly + /// an identifier or a meta-variable (that hopefully would be instantiated by an identifier). + MacroRulesNotName, + /// The keyword `macro` was processed. + Macro, + /// The keyword `macro` followed by a name was processed. + MacroName, + /// The keyword `macro` followed by a name and a token delimited by parentheses was processed. + MacroNameParen, +} + +/// Checks `tts` as part of the RHS of a macro definition, tries to recognize nested macro +/// definitions, and sets `valid` to false in case of errors. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `tts` is checked as part of a RHS and may contain macro definitions +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `valid` is set in case of errors +fn check_nested_occurrences( + sess: &ParseSess, + node_id: NodeId, + tts: &[TokenTree], + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + valid: &mut bool, +) { + let mut state = NestedMacroState::Empty; + let nested_macros = macros.push(MacroState { binders, ops: ops.into() }); + let mut nested_binders = Binders::default(); + for tt in tts { + match (state, tt) { + ( + NestedMacroState::Empty, + &TokenTree::Token(Token { kind: TokenKind::Ident(name, false), .. }), + ) => { + if name == sym::macro_rules { + state = NestedMacroState::MacroRules; + } else if name == kw::Macro { + state = NestedMacroState::Macro; + } + } + ( + NestedMacroState::MacroRules, + &TokenTree::Token(Token { kind: TokenKind::Not, .. }), + ) => { + state = NestedMacroState::MacroRulesNot; + } + ( + NestedMacroState::MacroRulesNot, + &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), + ) => { + state = NestedMacroState::MacroRulesNotName; + } + (NestedMacroState::MacroRulesNot, &TokenTree::MetaVar(..)) => { + state = NestedMacroState::MacroRulesNotName; + // We check that the meta-variable is correctly used. + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + (NestedMacroState::MacroRulesNotName, &TokenTree::Delimited(_, ref del)) + | (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) + if del.delim == DelimToken::Brace => + { + let legacy = state == NestedMacroState::MacroRulesNotName; + state = NestedMacroState::Empty; + let rest = + check_nested_macro(sess, node_id, legacy, &del.tts, &nested_macros, valid); + // If we did not check the whole macro definition, then check the rest as if outside + // the macro definition. + check_nested_occurrences( + sess, + node_id, + &del.tts[rest..], + macros, + binders, + ops, + valid, + ); + } + ( + NestedMacroState::Macro, + &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), + ) => { + state = NestedMacroState::MacroName; + } + (NestedMacroState::Macro, &TokenTree::MetaVar(..)) => { + state = NestedMacroState::MacroName; + // We check that the meta-variable is correctly used. + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) + if del.delim == DelimToken::Paren => + { + state = NestedMacroState::MacroNameParen; + nested_binders = Binders::default(); + check_binders( + sess, + node_id, + tt, + &nested_macros, + &mut nested_binders, + &Stack::Empty, + valid, + ); + } + (NestedMacroState::MacroNameParen, &TokenTree::Delimited(_, ref del)) + if del.delim == DelimToken::Brace => + { + state = NestedMacroState::Empty; + check_occurrences( + sess, + node_id, + tt, + &nested_macros, + &nested_binders, + &Stack::Empty, + valid, + ); + } + (_, ref tt) => { + state = NestedMacroState::Empty; + check_occurrences(sess, node_id, tt, macros, binders, ops, valid); + } + } + } +} + +/// Checks the body of nested macro, returns where the check stopped, and sets `valid` to false in +/// case of errors. +/// +/// The token trees are checked as long as they look like a list of (LHS) => {RHS} token trees. This +/// check is a best-effort to detect a macro definition. It returns the position in `tts` where we +/// stopped checking because we detected we were not in a macro definition anymore. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `legacy` specifies whether the macro is legacy +/// - `tts` is checked as a list of (LHS) => {RHS} +/// - `macros` is the stack of outer macros +/// - `valid` is set in case of errors +fn check_nested_macro( + sess: &ParseSess, + node_id: NodeId, + legacy: bool, + tts: &[TokenTree], + macros: &Stack<'_, MacroState<'_>>, + valid: &mut bool, +) -> usize { + let n = tts.len(); + let mut i = 0; + let separator = if legacy { TokenKind::Semi } else { TokenKind::Comma }; + loop { + // We expect 3 token trees: `(LHS) => {RHS}`. The separator is checked after. + if i + 2 >= n + || !tts[i].is_delimited() + || !tts[i + 1].is_token(&TokenKind::FatArrow) + || !tts[i + 2].is_delimited() + { + break; + } + let lhs = &tts[i]; + let rhs = &tts[i + 2]; + let mut binders = Binders::default(); + check_binders(sess, node_id, lhs, macros, &mut binders, &Stack::Empty, valid); + check_occurrences(sess, node_id, rhs, macros, &binders, &Stack::Empty, valid); + // Since the last semicolon is optional for legacy macros and decl_macro are not terminated, + // we increment our checked position by how many token trees we already checked (the 3 + // above) before checking for the separator. + i += 3; + if i == n || !tts[i].is_token(&separator) { + break; + } + // We increment our checked position for the semicolon. + i += 1; + } + i +} + +/// Checks that a meta-variable occurrence is valid. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `macros` is the stack of possible outer macros +/// - `binders` contains the binders of the associated LHS +/// - `ops` is the stack of Kleene operators from the RHS +/// - `span` is the span of the meta-variable to check +/// - `name` is the name of the meta-variable to check +fn check_ops_is_prefix( + sess: &ParseSess, + node_id: NodeId, + macros: &Stack<'_, MacroState<'_>>, + binders: &Binders, + ops: &Stack<'_, KleeneToken>, + span: Span, + name: Ident, +) { + let macros = macros.push(MacroState { binders, ops: ops.into() }); + // Accumulates the stacks the operators of each state until (and including when) the + // meta-variable is found. The innermost stack is first. + let mut acc: SmallVec<[&SmallVec<[KleeneToken; 1]>; 1]> = SmallVec::new(); + for state in ¯os { + acc.push(&state.ops); + if let Some(binder) = state.binders.get(&name) { + // This variable concatenates the stack of operators from the RHS of the LHS where the + // meta-variable was defined to where it is used (in possibly nested macros). The + // outermost operator is first. + let mut occurrence_ops: SmallVec<[KleeneToken; 2]> = SmallVec::new(); + // We need to iterate from the end to start with outermost stack. + for ops in acc.iter().rev() { + occurrence_ops.extend_from_slice(ops); + } + ops_is_prefix(sess, node_id, span, name, &binder.ops, &occurrence_ops); + return; + } + } + buffer_lint(sess, span.into(), node_id, &format!("unknown macro variable `{}`", name)); +} + +/// Returns whether `binder_ops` is a prefix of `occurrence_ops`. +/// +/// The stack of Kleene operators of a meta-variable occurrence just needs to have the stack of +/// Kleene operators of its binder as a prefix. +/// +/// Consider $i in the following example: +/// +/// ( $( $i:ident = $($j:ident),+ );* ) => { $($( $i += $j; )+)* } +/// +/// It occurs under the Kleene stack ["*", "+"] and is bound under ["*"] only. +/// +/// Arguments: +/// - `sess` is used to emit diagnostics and lints +/// - `node_id` is used to emit lints +/// - `span` is the span of the meta-variable being check +/// - `name` is the name of the meta-variable being check +/// - `binder_ops` is the stack of Kleene operators for the binder +/// - `occurrence_ops` is the stack of Kleene operators for the occurrence +fn ops_is_prefix( + sess: &ParseSess, + node_id: NodeId, + span: Span, + name: Ident, + binder_ops: &[KleeneToken], + occurrence_ops: &[KleeneToken], +) { + for (i, binder) in binder_ops.iter().enumerate() { + if i >= occurrence_ops.len() { + let mut span = MultiSpan::from_span(span); + span.push_span_label(binder.span, "expected repetition".into()); + let message = &format!("variable '{}' is still repeating at this depth", name); + buffer_lint(sess, span, node_id, message); + return; + } + let occurrence = &occurrence_ops[i]; + if occurrence.op != binder.op { + let mut span = MultiSpan::from_span(span); + span.push_span_label(binder.span, "expected repetition".into()); + span.push_span_label(occurrence.span, "conflicting repetition".into()); + let message = "meta-variable repeats with different Kleene operator"; + buffer_lint(sess, span, node_id, message); + return; + } + } +} + +fn buffer_lint(sess: &ParseSess, span: MultiSpan, node_id: NodeId, message: &str) { + sess.buffer_lint(BufferedEarlyLintId::MetaVariableMisuse, span, node_id, message); +} diff --git a/src/libsyntax/ext/mbe/macro_parser.rs b/src/libsyntax/ext/mbe/macro_parser.rs new file mode 100644 index 00000000000..a34a0344f27 --- /dev/null +++ b/src/libsyntax/ext/mbe/macro_parser.rs @@ -0,0 +1,952 @@ +//! This is an NFA-based parser, which calls out to the main rust parser for named non-terminals +//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads +//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in +//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier +//! fit for Macro-by-Example-style rules. +//! +//! (In order to prevent the pathological case, we'd need to lazily construct the resulting +//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old +//! items, but it would also save overhead) +//! +//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate. +//! The macro parser restricts itself to the features of finite state automata. Earley parsers +//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. +//! +//! Quick intro to how the parser works: +//! +//! A 'position' is a dot in the middle of a matcher, usually represented as a +//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. +//! +//! The parser walks through the input a character at a time, maintaining a list +//! of threads consistent with the current position in the input string: `cur_items`. +//! +//! As it processes them, it fills up `eof_items` with threads that would be valid if +//! the macro invocation is now over, `bb_items` with threads that are waiting on +//! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting +//! on a particular token. Most of the logic concerns moving the · through the +//! repetitions indicated by Kleene stars. The rules for moving the · without +//! consuming any input are called epsilon transitions. It only advances or calls +//! out to the real Rust parser when no `cur_items` threads remain. +//! +//! Example: +//! +//! ```text, ignore +//! Start parsing a a a a b against [· a $( a )* a b]. +//! +//! Remaining input: a a a a b +//! next: [· a $( a )* a b] +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a a b +//! cur: [a · $( a )* a b] +//! Descend/Skip (first item). +//! next: [a $( · a )* a b] [a $( a )* · a b]. +//! +//! - - - Advance over an a. - - - +//! +//! Remaining input: a a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: a b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over an a. - - - (this looks exactly like the last step) +//! +//! Remaining input: b +//! cur: [a $( a · )* a b] [a $( a )* a · b] +//! Follow epsilon transition: Finish/Repeat (first item) +//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] +//! +//! - - - Advance over a b. - - - +//! +//! Remaining input: '' +//! eof: [a $( a )* a b ·] +//! ``` + +crate use NamedMatch::*; +crate use ParseResult::*; +use TokenTreeOrTokenTreeSlice::*; + +use crate::ast::{Ident, Name}; +use crate::ext::tt::quoted::{self, TokenTree}; +use crate::parse::{Directory, ParseSess}; +use crate::parse::parser::{Parser, PathStyle}; +use crate::parse::token::{self, DocComment, Nonterminal, Token}; +use crate::print::pprust; +use crate::symbol::{kw, sym, Symbol}; +use crate::tokenstream::{DelimSpan, TokenStream}; + +use errors::FatalError; +use smallvec::{smallvec, SmallVec}; +use syntax_pos::Span; + +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::Lrc; +use std::collections::hash_map::Entry::{Occupied, Vacant}; +use std::mem; +use std::ops::{Deref, DerefMut}; + +// To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. + +/// Either a sequence of token trees or a single one. This is used as the representation of the +/// sequence of tokens that make up a matcher. +#[derive(Clone)] +enum TokenTreeOrTokenTreeSlice<'tt> { + Tt(TokenTree), + TtSeq(&'tt [TokenTree]), +} + +impl<'tt> TokenTreeOrTokenTreeSlice<'tt> { + /// Returns the number of constituent top-level token trees of `self` (top-level in that it + /// will not recursively descend into subtrees). + fn len(&self) -> usize { + match *self { + TtSeq(ref v) => v.len(), + Tt(ref tt) => tt.len(), + } + } + + /// The `index`-th token tree of `self`. + fn get_tt(&self, index: usize) -> TokenTree { + match *self { + TtSeq(ref v) => v[index].clone(), + Tt(ref tt) => tt.get_tt(index), + } + } +} + +/// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`. +/// +/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have +/// descended into. +#[derive(Clone)] +struct MatcherTtFrame<'tt> { + /// The "parent" matcher that we are descending into. + elts: TokenTreeOrTokenTreeSlice<'tt>, + /// The position of the "dot" in `elts` at the time we descended. + idx: usize, +} + +type NamedMatchVec = SmallVec<[NamedMatch; 4]>; + +/// Represents a single "position" (aka "matcher position", aka "item"), as +/// described in the module documentation. +/// +/// Here: +/// +/// - `'root` represents the lifetime of the stack slot that holds the root +/// `MatcherPos`. As described in `MatcherPosHandle`, the root `MatcherPos` +/// structure is stored on the stack, but subsequent instances are put into +/// the heap. +/// - `'tt` represents the lifetime of the token trees that this matcher +/// position refers to. +/// +/// It is important to distinguish these two lifetimes because we have a +/// `SmallVec>` below, and the destructor of +/// that is considered to possibly access the data from its elements (it lacks +/// a `#[may_dangle]` attribute). As a result, the compiler needs to know that +/// all the elements in that `SmallVec` strictly outlive the root stack slot +/// lifetime. By separating `'tt` from `'root`, we can show that. +#[derive(Clone)] +struct MatcherPos<'root, 'tt> { + /// The token or sequence of tokens that make up the matcher + top_elts: TokenTreeOrTokenTreeSlice<'tt>, + + /// The position of the "dot" in this matcher + idx: usize, + + /// The first span of source that the beginning of this matcher corresponds to. In other + /// words, the token in the source whose span is `sp_open` is matched against the first token of + /// the matcher. + sp_open: Span, + + /// For each named metavar in the matcher, we keep track of token trees matched against the + /// metavar by the black box parser. In particular, there may be more than one match per + /// metavar if we are in a repetition (each repetition matches each of the variables). + /// Moreover, matchers and repetitions can be nested; the `matches` field is shared (hence the + /// `Rc`) among all "nested" matchers. `match_lo`, `match_cur`, and `match_hi` keep track of + /// the current position of the `self` matcher position in the shared `matches` list. + /// + /// Also, note that while we are descending into a sequence, matchers are given their own + /// `matches` vector. Only once we reach the end of a full repetition of the sequence do we add + /// all bound matches from the submatcher into the shared top-level `matches` vector. If `sep` + /// and `up` are `Some`, then `matches` is _not_ the shared top-level list. Instead, if one + /// wants the shared `matches`, one should use `up.matches`. + matches: Box<[Lrc]>, + /// The position in `matches` corresponding to the first metavar in this matcher's sequence of + /// token trees. In other words, the first metavar in the first token of `top_elts` corresponds + /// to `matches[match_lo]`. + match_lo: usize, + /// The position in `matches` corresponding to the metavar we are currently trying to match + /// against the source token stream. `match_lo <= match_cur <= match_hi`. + match_cur: usize, + /// Similar to `match_lo` except `match_hi` is the position in `matches` of the _last_ metavar + /// in this matcher. + match_hi: usize, + + // The following fields are used if we are matching a repetition. If we aren't, they should be + // `None`. + + /// The KleeneOp of this sequence if we are in a repetition. + seq_op: Option, + + /// The separator if we are in a repetition. + sep: Option, + + /// The "parent" matcher position if we are in a repetition. That is, the matcher position just + /// before we enter the sequence. + up: Option>, + + /// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from + /// a delimited token tree (e.g., something wrapped in `(` `)`) or to get the contents of a doc + /// comment... + /// + /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) + /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does + /// that where the bottom of the stack is the outermost matcher. + /// Also, throughout the comments, this "descent" is often referred to as "unzipping"... + stack: SmallVec<[MatcherTtFrame<'tt>; 1]>, +} + +impl<'root, 'tt> MatcherPos<'root, 'tt> { + /// Adds `m` as a named match for the `idx`-th metavar. + fn push_match(&mut self, idx: usize, m: NamedMatch) { + let matches = Lrc::make_mut(&mut self.matches[idx]); + matches.push(m); + } +} + +// Lots of MatcherPos instances are created at runtime. Allocating them on the +// heap is slow. Furthermore, using SmallVec to allocate them all +// on the stack is also slow, because MatcherPos is quite a large type and +// instances get moved around a lot between vectors, which requires lots of +// slow memcpy calls. +// +// Therefore, the initial MatcherPos is always allocated on the stack, +// subsequent ones (of which there aren't that many) are allocated on the heap, +// and this type is used to encapsulate both cases. +enum MatcherPosHandle<'root, 'tt> { + Ref(&'root mut MatcherPos<'root, 'tt>), + Box(Box>), +} + +impl<'root, 'tt> Clone for MatcherPosHandle<'root, 'tt> { + // This always produces a new Box. + fn clone(&self) -> Self { + MatcherPosHandle::Box(match *self { + MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()), + MatcherPosHandle::Box(ref b) => b.clone(), + }) + } +} + +impl<'root, 'tt> Deref for MatcherPosHandle<'root, 'tt> { + type Target = MatcherPos<'root, 'tt>; + fn deref(&self) -> &Self::Target { + match *self { + MatcherPosHandle::Ref(ref r) => r, + MatcherPosHandle::Box(ref b) => b, + } + } +} + +impl<'root, 'tt> DerefMut for MatcherPosHandle<'root, 'tt> { + fn deref_mut(&mut self) -> &mut MatcherPos<'root, 'tt> { + match *self { + MatcherPosHandle::Ref(ref mut r) => r, + MatcherPosHandle::Box(ref mut b) => b, + } + } +} + +/// Represents the possible results of an attempted parse. +crate enum ParseResult { + /// Parsed successfully. + Success(T), + /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected + /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. + Failure(Token, &'static str), + /// Fatal error (malformed macro?). Abort compilation. + Error(syntax_pos::Span, String), +} + +/// A `ParseResult` where the `Success` variant contains a mapping of `Ident`s to `NamedMatch`es. +/// This represents the mapping of metavars to the token trees they bind to. +crate type NamedParseResult = ParseResult>; + +/// Count how many metavars are named in the given matcher `ms`. +crate fn count_names(ms: &[TokenTree]) -> usize { + ms.iter().fold(0, |count, elt| { + count + match *elt { + TokenTree::Sequence(_, ref seq) => seq.num_captures, + TokenTree::Delimited(_, ref delim) => count_names(&delim.tts), + TokenTree::MetaVar(..) => 0, + TokenTree::MetaVarDecl(..) => 1, + TokenTree::Token(..) => 0, + } + }) +} + +/// `len` `Vec`s (initially shared and empty) that will store matches of metavars. +fn create_matches(len: usize) -> Box<[Lrc]> { + if len == 0 { + vec![] + } else { + let empty_matches = Lrc::new(SmallVec::new()); + vec![empty_matches; len] + }.into_boxed_slice() +} + +/// Generates the top-level matcher position in which the "dot" is before the first token of the +/// matcher `ms` and we are going to start matching at the span `open` in the source. +fn initial_matcher_pos<'root, 'tt>(ms: &'tt [TokenTree], open: Span) -> MatcherPos<'root, 'tt> { + let match_idx_hi = count_names(ms); + let matches = create_matches(match_idx_hi); + MatcherPos { + // Start with the top level matcher given to us + top_elts: TtSeq(ms), // "elts" is an abbr. for "elements" + // The "dot" is before the first token of the matcher + idx: 0, + // We start matching at the span `open` in the source code + sp_open: open, + + // Initialize `matches` to a bunch of empty `Vec`s -- one for each metavar in `top_elts`. + // `match_lo` for `top_elts` is 0 and `match_hi` is `matches.len()`. `match_cur` is 0 since + // we haven't actually matched anything yet. + matches, + match_lo: 0, + match_cur: 0, + match_hi: match_idx_hi, + + // Haven't descended into any delimiters, so empty stack + stack: smallvec![], + + // Haven't descended into any sequences, so both of these are `None`. + seq_op: None, + sep: None, + up: None, + } +} + +/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`: +/// so it is associated with a single ident in a parse, and all +/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type +/// (expr, item, etc). Each leaf in a single `NamedMatch` corresponds to a +/// single `token::MATCH_NONTERMINAL` in the `TokenTree` that produced it. +/// +/// The in-memory structure of a particular `NamedMatch` represents the match +/// that occurred when a particular subset of a matcher was applied to a +/// particular token tree. +/// +/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of +/// the `MatchedNonterminal`s, will depend on the token tree it was applied +/// to: each `MatchedSeq` corresponds to a single `TTSeq` in the originating +/// token tree. The depth of the `NamedMatch` structure will therefore depend +/// only on the nesting depth of `ast::TTSeq`s in the originating +/// token tree it was derived from. +#[derive(Debug, Clone)] +crate enum NamedMatch { + MatchedSeq(Lrc, DelimSpan), + MatchedNonterminal(Lrc), +} + +/// Takes a sequence of token trees `ms` representing a matcher which successfully matched input +/// and an iterator of items that matched input and produces a `NamedParseResult`. +fn nameize>( + sess: &ParseSess, + ms: &[TokenTree], + mut res: I, +) -> NamedParseResult { + // Recursively descend into each type of matcher (e.g., sequences, delimited, metavars) and make + // sure that each metavar has _exactly one_ binding. If a metavar does not have exactly one + // binding, then there is an error. If it does, then we insert the binding into the + // `NamedParseResult`. + fn n_rec>( + sess: &ParseSess, + m: &TokenTree, + res: &mut I, + ret_val: &mut FxHashMap, + ) -> Result<(), (syntax_pos::Span, String)> { + match *m { + TokenTree::Sequence(_, ref seq) => for next_m in &seq.tts { + n_rec(sess, next_m, res.by_ref(), ret_val)? + }, + TokenTree::Delimited(_, ref delim) => for next_m in &delim.tts { + n_rec(sess, next_m, res.by_ref(), ret_val)?; + }, + TokenTree::MetaVarDecl(span, _, id) if id.name == kw::Invalid => { + if sess.missing_fragment_specifiers.borrow_mut().remove(&span) { + return Err((span, "missing fragment specifier".to_string())); + } + } + TokenTree::MetaVarDecl(sp, bind_name, _) => { + match ret_val.entry(bind_name) { + Vacant(spot) => { + spot.insert(res.next().unwrap()); + } + Occupied(..) => { + return Err((sp, format!("duplicated bind name: {}", bind_name))) + } + } + } + TokenTree::MetaVar(..) | TokenTree::Token(..) => (), + } + + Ok(()) + } + + let mut ret_val = FxHashMap::default(); + for m in ms { + match n_rec(sess, m, res.by_ref(), &mut ret_val) { + Ok(_) => {} + Err((sp, msg)) => return Error(sp, msg), + } + } + + Success(ret_val) +} + +/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For +/// other tokens, this is "unexpected token...". +crate fn parse_failure_msg(tok: &Token) -> String { + match tok.kind { + token::Eof => "unexpected end of macro invocation".to_string(), + _ => format!( + "no rules expected the token `{}`", + pprust::token_to_string(tok) + ), + } +} + +/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) +fn token_name_eq(t1: &Token, t2: &Token) -> bool { + if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { + ident1.name == ident2.name && is_raw1 == is_raw2 + } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { + ident1.name == ident2.name + } else { + t1.kind == t2.kind + } +} + +/// Process the matcher positions of `cur_items` until it is empty. In the process, this will +/// produce more items in `next_items`, `eof_items`, and `bb_items`. +/// +/// For more info about the how this happens, see the module-level doc comments and the inline +/// comments of this function. +/// +/// # Parameters +/// +/// - `sess`: the parsing session into which errors are emitted. +/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a +/// successful execution of this function. +/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in +/// the function `parse`. +/// - `eof_items`: the set of items that would be valid if this was the EOF. +/// - `bb_items`: the set of items that are waiting for the black-box parser. +/// - `token`: the current token of the parser. +/// - `span`: the `Span` in the source code corresponding to the token trees we are trying to match +/// against the matcher positions in `cur_items`. +/// +/// # Returns +/// +/// A `ParseResult`. Note that matches are kept track of through the items generated. +fn inner_parse_loop<'root, 'tt>( + sess: &ParseSess, + cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + next_items: &mut Vec>, + eof_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + token: &Token, +) -> ParseResult<()> { + // Pop items from `cur_items` until it is empty. + while let Some(mut item) = cur_items.pop() { + // When unzipped trees end, remove them. This corresponds to backtracking out of a + // delimited submatcher into which we already descended. In backtracking out again, we need + // to advance the "dot" past the delimiters in the outer matcher. + while item.idx >= item.top_elts.len() { + match item.stack.pop() { + Some(MatcherTtFrame { elts, idx }) => { + item.top_elts = elts; + item.idx = idx + 1; + } + None => break, + } + } + + // Get the current position of the "dot" (`idx`) in `item` and the number of token trees in + // the matcher (`len`). + let idx = item.idx; + let len = item.top_elts.len(); + + // If `idx >= len`, then we are at or past the end of the matcher of `item`. + if idx >= len { + // We are repeating iff there is a parent. If the matcher is inside of a repetition, + // then we could be at the end of a sequence or at the beginning of the next + // repetition. + if item.up.is_some() { + // At this point, regardless of whether there is a separator, we should add all + // matches from the complete repetition of the sequence to the shared, top-level + // `matches` list (actually, `up.matches`, which could itself not be the top-level, + // but anyway...). Moreover, we add another item to `cur_items` in which the "dot" + // is at the end of the `up` matcher. This ensures that the "dot" in the `up` + // matcher is also advanced sufficiently. + // + // NOTE: removing the condition `idx == len` allows trailing separators. + if idx == len { + // Get the `up` matcher + let mut new_pos = item.up.clone().unwrap(); + + // Add matches from this repetition to the `matches` of `up` + for idx in item.match_lo..item.match_hi { + let sub = item.matches[idx].clone(); + let span = DelimSpan::from_pair(item.sp_open, token.span); + new_pos.push_match(idx, MatchedSeq(sub, span)); + } + + // Move the "dot" past the repetition in `up` + new_pos.match_cur = item.match_hi; + new_pos.idx += 1; + cur_items.push(new_pos); + } + + // Check if we need a separator. + if idx == len && item.sep.is_some() { + // We have a separator, and it is the current token. We can advance past the + // separator token. + if item.sep + .as_ref() + .map(|sep| token_name_eq(token, sep)) + .unwrap_or(false) + { + item.idx += 1; + next_items.push(item); + } + } + // We don't need a separator. Move the "dot" back to the beginning of the matcher + // and try to match again UNLESS we are only allowed to have _one_ repetition. + else if item.seq_op != Some(quoted::KleeneOp::ZeroOrOne) { + item.match_cur = item.match_lo; + item.idx = 0; + cur_items.push(item); + } + } + // If we are not in a repetition, then being at the end of a matcher means that we have + // reached the potential end of the input. + else { + eof_items.push(item); + } + } + // We are in the middle of a matcher. + else { + // Look at what token in the matcher we are trying to match the current token (`token`) + // against. Depending on that, we may generate new items. + match item.top_elts.get_tt(idx) { + // Need to descend into a sequence + TokenTree::Sequence(sp, seq) => { + // Examine the case where there are 0 matches of this sequence. We are + // implicitly disallowing OneOrMore from having 0 matches here. Thus, that will + // result in a "no rules expected token" error by virtue of this matcher not + // working. + if seq.kleene.op == quoted::KleeneOp::ZeroOrMore + || seq.kleene.op == quoted::KleeneOp::ZeroOrOne + { + let mut new_item = item.clone(); + new_item.match_cur += seq.num_captures; + new_item.idx += 1; + for idx in item.match_cur..item.match_cur + seq.num_captures { + new_item.push_match(idx, MatchedSeq(Lrc::new(smallvec![]), sp)); + } + cur_items.push(new_item); + } + + let matches = create_matches(item.matches.len()); + cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos { + stack: smallvec![], + sep: seq.separator.clone(), + seq_op: Some(seq.kleene.op), + idx: 0, + matches, + match_lo: item.match_cur, + match_cur: item.match_cur, + match_hi: item.match_cur + seq.num_captures, + up: Some(item), + sp_open: sp.open, + top_elts: Tt(TokenTree::Sequence(sp, seq)), + }))); + } + + // We need to match a metavar (but the identifier is invalid)... this is an error + TokenTree::MetaVarDecl(span, _, id) if id.name == kw::Invalid => { + if sess.missing_fragment_specifiers.borrow_mut().remove(&span) { + return Error(span, "missing fragment specifier".to_string()); + } + } + + // We need to match a metavar with a valid ident... call out to the black-box + // parser by adding an item to `bb_items`. + TokenTree::MetaVarDecl(_, _, id) => { + // Built-in nonterminals never start with these tokens, + // so we can eliminate them from consideration. + if may_begin_with(token, id.name) { + bb_items.push(item); + } + } + + // We need to descend into a delimited submatcher or a doc comment. To do this, we + // push the current matcher onto a stack and push a new item containing the + // submatcher onto `cur_items`. + // + // At the beginning of the loop, if we reach the end of the delimited submatcher, + // we pop the stack to backtrack out of the descent. + seq @ TokenTree::Delimited(..) | + seq @ TokenTree::Token(Token { kind: DocComment(..), .. }) => { + let lower_elts = mem::replace(&mut item.top_elts, Tt(seq)); + let idx = item.idx; + item.stack.push(MatcherTtFrame { + elts: lower_elts, + idx, + }); + item.idx = 0; + cur_items.push(item); + } + + // We just matched a normal token. We can just advance the parser. + TokenTree::Token(t) if token_name_eq(&t, token) => { + item.idx += 1; + next_items.push(item); + } + + // There was another token that was not `token`... This means we can't add any + // rules. NOTE that this is not necessarily an error unless _all_ items in + // `cur_items` end up doing this. There may still be some other matchers that do + // end up working out. + TokenTree::Token(..) | TokenTree::MetaVar(..) => {} + } + } + } + + // Yay a successful parse (so far)! + Success(()) +} + +/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts` +/// against it and return the match. +/// +/// # Parameters +/// +/// - `sess`: The session into which errors are emitted +/// - `tts`: The tokenstream we are matching against the pattern `ms` +/// - `ms`: A sequence of token trees representing a pattern against which we are matching +/// - `directory`: Information about the file locations (needed for the black-box parser) +/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box +/// parser) +crate fn parse( + sess: &ParseSess, + tts: TokenStream, + ms: &[TokenTree], + directory: Option>, + recurse_into_modules: bool, +) -> NamedParseResult { + // Create a parser that can be used for the "black box" parts. + let mut parser = Parser::new( + sess, + tts, + directory, + recurse_into_modules, + true, + crate::MACRO_ARGUMENTS, + ); + + // A queue of possible matcher positions. We initialize it with the matcher position in which + // the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then + // processes all of these possible matcher positions and produces possible next positions into + // `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items` + // and we start over again. + // + // This MatcherPos instance is allocated on the stack. All others -- and + // there are frequently *no* others! -- are allocated on the heap. + let mut initial = initial_matcher_pos(ms, parser.token.span); + let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)]; + let mut next_items = Vec::new(); + + loop { + // Matcher positions black-box parsed by parser.rs (`parser`) + let mut bb_items = SmallVec::new(); + + // Matcher positions that would be valid if the macro invocation was over now + let mut eof_items = SmallVec::new(); + assert!(next_items.is_empty()); + + // Process `cur_items` until either we have finished the input or we need to get some + // parsing from the black-box parser done. The result is that `next_items` will contain a + // bunch of possible next matcher positions in `next_items`. + match inner_parse_loop( + sess, + &mut cur_items, + &mut next_items, + &mut eof_items, + &mut bb_items, + &parser.token, + ) { + Success(_) => {} + Failure(token, msg) => return Failure(token, msg), + Error(sp, msg) => return Error(sp, msg), + } + + // inner parse loop handled all cur_items, so it's empty + assert!(cur_items.is_empty()); + + // We need to do some post processing after the `inner_parser_loop`. + // + // Error messages here could be improved with links to original rules. + + // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, + // either the parse is ambiguous (which should never happen) or there is a syntax error. + if parser.token == token::Eof { + if eof_items.len() == 1 { + let matches = eof_items[0] + .matches + .iter_mut() + .map(|dv| Lrc::make_mut(dv).pop().unwrap()); + return nameize(sess, ms, matches); + } else if eof_items.len() > 1 { + return Error( + parser.token.span, + "ambiguity: multiple successful parses".to_string(), + ); + } else { + return Failure( + Token::new(token::Eof, if parser.token.span.is_dummy() { + parser.token.span + } else { + sess.source_map().next_point(parser.token.span) + }), + "missing tokens in macro arguments", + ); + } + } + // Performance hack: eof_items may share matchers via Rc with other things that we want + // to modify. Dropping eof_items now may drop these refcounts to 1, preventing an + // unnecessary implicit clone later in Rc::make_mut. + drop(eof_items); + + // Another possibility is that we need to call out to parse some rust nonterminal + // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. + if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 { + let nts = bb_items + .iter() + .map(|item| match item.top_elts.get_tt(item.idx) { + TokenTree::MetaVarDecl(_, bind, name) => format!("{} ('{}')", name, bind), + _ => panic!(), + }) + .collect::>() + .join(" or "); + + return Error( + parser.token.span, + format!( + "local ambiguity: multiple parsing options: {}", + match next_items.len() { + 0 => format!("built-in NTs {}.", nts), + 1 => format!("built-in NTs {} or 1 other option.", nts), + n => format!("built-in NTs {} or {} other options.", nts, n), + } + ), + ); + } + // If there are no possible next positions AND we aren't waiting for the black-box parser, + // then there is a syntax error. + else if bb_items.is_empty() && next_items.is_empty() { + return Failure( + parser.token.take(), + "no rules expected this token in macro call", + ); + } + // Dump all possible `next_items` into `cur_items` for the next iteration. + else if !next_items.is_empty() { + // Now process the next token + cur_items.extend(next_items.drain(..)); + parser.bump(); + } + // Finally, we have the case where we need to call the black-box parser to get some + // nonterminal. + else { + assert_eq!(bb_items.len(), 1); + + let mut item = bb_items.pop().unwrap(); + if let TokenTree::MetaVarDecl(span, _, ident) = item.top_elts.get_tt(item.idx) { + let match_cur = item.match_cur; + item.push_match( + match_cur, + MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))), + ); + item.idx += 1; + item.match_cur += 1; + } else { + unreachable!() + } + cur_items.push(item); + } + + assert!(!cur_items.is_empty()); + } +} + +/// The token is an identifier, but not `_`. +/// We prohibit passing `_` to macros expecting `ident` for now. +fn get_macro_name(token: &Token) -> Option<(Name, bool)> { + match token.kind { + token::Ident(name, is_raw) if name != kw::Underscore => Some((name, is_raw)), + _ => None, + } +} + +/// Checks whether a non-terminal may begin with a particular token. +/// +/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that +/// token. Be conservative (return true) if not sure. +fn may_begin_with(token: &Token, name: Name) -> bool { + /// Checks whether the non-terminal may contain a single (non-keyword) identifier. + fn may_be_ident(nt: &token::Nonterminal) -> bool { + match *nt { + token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) => false, + _ => true, + } + } + + match name { + sym::expr => token.can_begin_expr() + // This exception is here for backwards compatibility. + && !token.is_keyword(kw::Let), + sym::ty => token.can_begin_type(), + sym::ident => get_macro_name(token).is_some(), + sym::literal => token.can_begin_literal_or_bool(), + sym::vis => match token.kind { + // The follow-set of :vis + "priv" keyword + interpolated + token::Comma | token::Ident(..) | token::Interpolated(_) => true, + _ => token.can_begin_type(), + }, + sym::block => match token.kind { + token::OpenDelim(token::Brace) => true, + token::Interpolated(ref nt) => match **nt { + token::NtItem(_) + | token::NtPat(_) + | token::NtTy(_) + | token::NtIdent(..) + | token::NtMeta(_) + | token::NtPath(_) + | token::NtVis(_) => false, // none of these may start with '{'. + _ => true, + }, + _ => false, + }, + sym::path | sym::meta => match token.kind { + token::ModSep | token::Ident(..) => true, + token::Interpolated(ref nt) => match **nt { + token::NtPath(_) | token::NtMeta(_) => true, + _ => may_be_ident(&nt), + }, + _ => false, + }, + sym::pat => match token.kind { + token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) + token::OpenDelim(token::Paren) | // tuple pattern + token::OpenDelim(token::Bracket) | // slice pattern + token::BinOp(token::And) | // reference + token::BinOp(token::Minus) | // negative literal + token::AndAnd | // double reference + token::Literal(..) | // literal + token::DotDot | // range pattern (future compat) + token::DotDotDot | // range pattern (future compat) + token::ModSep | // path + token::Lt | // path (UFCS constant) + token::BinOp(token::Shl) => true, // path (double UFCS) + token::Interpolated(ref nt) => may_be_ident(nt), + _ => false, + }, + sym::lifetime => match token.kind { + token::Lifetime(_) => true, + token::Interpolated(ref nt) => match **nt { + token::NtLifetime(_) | token::NtTT(_) => true, + _ => false, + }, + _ => false, + }, + _ => match token.kind { + token::CloseDelim(_) => false, + _ => true, + }, + } +} + +/// A call to the "black-box" parser to parse some Rust non-terminal. +/// +/// # Parameters +/// +/// - `p`: the "black-box" parser to use +/// - `sp`: the `Span` we want to parse +/// - `name`: the name of the metavar _matcher_ we want to match (e.g., `tt`, `ident`, `block`, +/// etc...) +/// +/// # Returns +/// +/// The parsed non-terminal. +fn parse_nt(p: &mut Parser<'_>, sp: Span, name: Symbol) -> Nonterminal { + if name == sym::tt { + return token::NtTT(p.parse_token_tree()); + } + // check at the beginning and the parser checks after each bump + p.process_potential_macro_variable(); + match name { + sym::item => match panictry!(p.parse_item()) { + Some(i) => token::NtItem(i), + None => { + p.fatal("expected an item keyword").emit(); + FatalError.raise(); + } + }, + sym::block => token::NtBlock(panictry!(p.parse_block())), + sym::stmt => match panictry!(p.parse_stmt()) { + Some(s) => token::NtStmt(s), + None => { + p.fatal("expected a statement").emit(); + FatalError.raise(); + } + }, + sym::pat => token::NtPat(panictry!(p.parse_pat(None))), + sym::expr => token::NtExpr(panictry!(p.parse_expr())), + sym::literal => token::NtLiteral(panictry!(p.parse_literal_maybe_minus())), + sym::ty => token::NtTy(panictry!(p.parse_ty())), + // this could be handled like a token, since it is one + sym::ident => if let Some((name, is_raw)) = get_macro_name(&p.token) { + let span = p.token.span; + p.bump(); + token::NtIdent(Ident::new(name, span), is_raw) + } else { + let token_str = pprust::token_to_string(&p.token); + p.fatal(&format!("expected ident, found {}", &token_str)).emit(); + FatalError.raise() + } + sym::path => token::NtPath(panictry!(p.parse_path(PathStyle::Type))), + sym::meta => token::NtMeta(panictry!(p.parse_meta_item())), + sym::vis => token::NtVis(panictry!(p.parse_visibility(true))), + sym::lifetime => if p.check_lifetime() { + token::NtLifetime(p.expect_lifetime().ident) + } else { + let token_str = pprust::token_to_string(&p.token); + p.fatal(&format!("expected a lifetime, found `{}`", &token_str)).emit(); + FatalError.raise(); + } + // this is not supposed to happen, since it has been checked + // when compiling the macro. + _ => p.span_bug(sp, "invalid fragment specifier"), + } +} diff --git a/src/libsyntax/ext/mbe/macro_rules.rs b/src/libsyntax/ext/mbe/macro_rules.rs new file mode 100644 index 00000000000..90dfa6e7ac8 --- /dev/null +++ b/src/libsyntax/ext/mbe/macro_rules.rs @@ -0,0 +1,1173 @@ +use crate::ast; +use crate::attr::{self, TransparencyError}; +use crate::edition::Edition; +use crate::ext::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander}; +use crate::ext::base::{SyntaxExtension, SyntaxExtensionKind}; +use crate::ext::expand::{AstFragment, AstFragmentKind}; +use crate::ext::tt::macro_check; +use crate::ext::tt::macro_parser::{parse, parse_failure_msg}; +use crate::ext::tt::macro_parser::{Error, Failure, Success}; +use crate::ext::tt::macro_parser::{MatchedNonterminal, MatchedSeq}; +use crate::ext::tt::quoted; +use crate::ext::tt::transcribe::transcribe; +use crate::feature_gate::Features; +use crate::parse::parser::Parser; +use crate::parse::token::TokenKind::*; +use crate::parse::token::{self, NtTT, Token}; +use crate::parse::{Directory, ParseSess}; +use crate::symbol::{kw, sym, Symbol}; +use crate::tokenstream::{DelimSpan, TokenStream, TokenTree}; + +use errors::{DiagnosticBuilder, FatalError}; +use log::debug; +use syntax_pos::hygiene::Transparency; +use syntax_pos::Span; + +use rustc_data_structures::fx::FxHashMap; +use std::borrow::Cow; +use std::collections::hash_map::Entry; +use std::slice; + +use errors::Applicability; +use rustc_data_structures::sync::Lrc; + +const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \ + `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, \ + `literal`, `path`, `meta`, `tt`, `item` and `vis`"; + +crate struct ParserAnyMacro<'a> { + parser: Parser<'a>, + + /// Span of the expansion site of the macro this parser is for + site_span: Span, + /// The ident of the macro we're parsing + macro_ident: ast::Ident, + arm_span: Span, +} + +crate fn annotate_err_with_kind( + err: &mut DiagnosticBuilder<'_>, + kind: AstFragmentKind, + span: Span, +) { + match kind { + AstFragmentKind::Ty => { + err.span_label(span, "this macro call doesn't expand to a type"); + } + AstFragmentKind::Pat => { + err.span_label(span, "this macro call doesn't expand to a pattern"); + } + _ => {} + }; +} + +impl<'a> ParserAnyMacro<'a> { + crate fn make(mut self: Box>, kind: AstFragmentKind) -> AstFragment { + let ParserAnyMacro { site_span, macro_ident, ref mut parser, arm_span } = *self; + let fragment = panictry!(parser.parse_ast_fragment(kind, true).map_err(|mut e| { + if parser.token == token::Eof && e.message().ends_with(", found ``") { + if !e.span.is_dummy() { + // early end of macro arm (#52866) + e.replace_span_with(parser.sess.source_map().next_point(parser.token.span)); + } + let msg = &e.message[0]; + e.message[0] = ( + format!( + "macro expansion ends with an incomplete expression: {}", + msg.0.replace(", found ``", ""), + ), + msg.1, + ); + } + if e.span.is_dummy() { + // Get around lack of span in error (#30128) + e.replace_span_with(site_span); + if parser.sess.source_map().span_to_filename(arm_span).is_real() { + e.span_label(arm_span, "in this macro arm"); + } + } else if !parser.sess.source_map().span_to_filename(parser.token.span).is_real() { + e.span_label(site_span, "in this macro invocation"); + } + match kind { + AstFragmentKind::Pat if macro_ident.name == sym::vec => { + let mut suggestion = None; + if let Ok(code) = parser.sess.source_map().span_to_snippet(site_span) { + if let Some(bang) = code.find('!') { + suggestion = Some(code[bang + 1..].to_string()); + } + } + if let Some(suggestion) = suggestion { + e.span_suggestion( + site_span, + "use a slice pattern here instead", + suggestion, + Applicability::MachineApplicable, + ); + } else { + e.span_label( + site_span, + "use a slice pattern here instead", + ); + } + e.help("for more information, see https://doc.rust-lang.org/edition-guide/\ + rust-2018/slice-patterns.html"); + } + _ => annotate_err_with_kind(&mut e, kind, site_span), + }; + e + })); + + // We allow semicolons at the end of expressions -- e.g., the semicolon in + // `macro_rules! m { () => { panic!(); } }` isn't parsed by `.parse_expr()`, + // but `m!()` is allowed in expression positions (cf. issue #34706). + if kind == AstFragmentKind::Expr && parser.token == token::Semi { + parser.bump(); + } + + // Make sure we don't have any tokens left to parse so we don't silently drop anything. + let path = ast::Path::from_ident(macro_ident.with_span_pos(site_span)); + parser.ensure_complete_parse(&path, kind.name(), site_span); + fragment + } +} + +struct MacroRulesMacroExpander { + name: ast::Ident, + span: Span, + transparency: Transparency, + lhses: Vec, + rhses: Vec, + valid: bool, +} + +impl TTMacroExpander for MacroRulesMacroExpander { + fn expand<'cx>( + &self, + cx: &'cx mut ExtCtxt<'_>, + sp: Span, + input: TokenStream, + ) -> Box { + if !self.valid { + return DummyResult::any(sp); + } + generic_extension( + cx, sp, self.span, self.name, self.transparency, input, &self.lhses, &self.rhses + ) + } +} + +fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) { + let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp); + cx.expansions.entry(sp).or_default().push(message); +} + +/// Given `lhses` and `rhses`, this is the new macro we create +fn generic_extension<'cx>( + cx: &'cx mut ExtCtxt<'_>, + sp: Span, + def_span: Span, + name: ast::Ident, + transparency: Transparency, + arg: TokenStream, + lhses: &[quoted::TokenTree], + rhses: &[quoted::TokenTree], +) -> Box { + if cx.trace_macros() { + trace_macros_note(cx, sp, format!("expanding `{}! {{ {} }}`", name, arg)); + } + + // Which arm's failure should we report? (the one furthest along) + let mut best_failure: Option<(Token, &str)> = None; + + for (i, lhs) in lhses.iter().enumerate() { + // try each arm's matchers + let lhs_tt = match *lhs { + quoted::TokenTree::Delimited(_, ref delim) => &delim.tts[..], + _ => cx.span_bug(sp, "malformed macro lhs"), + }; + + match TokenTree::parse(cx, lhs_tt, arg.clone()) { + Success(named_matches) => { + let rhs = match rhses[i] { + // ignore delimiters + quoted::TokenTree::Delimited(_, ref delimed) => delimed.tts.clone(), + _ => cx.span_bug(sp, "malformed macro rhs"), + }; + let arm_span = rhses[i].span(); + + let rhs_spans = rhs.iter().map(|t| t.span()).collect::>(); + // rhs has holes ( `$id` and `$(...)` that need filled) + let mut tts = transcribe(cx, &named_matches, rhs, transparency); + + // Replace all the tokens for the corresponding positions in the macro, to maintain + // proper positions in error reporting, while maintaining the macro_backtrace. + if rhs_spans.len() == tts.len() { + tts = tts.map_enumerated(|i, mut tt| { + let mut sp = rhs_spans[i]; + sp = sp.with_ctxt(tt.span().ctxt()); + tt.set_span(sp); + tt + }); + } + + if cx.trace_macros() { + trace_macros_note(cx, sp, format!("to `{}`", tts)); + } + + let directory = Directory { + path: Cow::from(cx.current_expansion.module.directory.as_path()), + ownership: cx.current_expansion.directory_ownership, + }; + let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false, None); + p.root_module_name = + cx.current_expansion.module.mod_path.last().map(|id| id.as_str().to_string()); + p.last_type_ascription = cx.current_expansion.prior_type_ascription; + + p.process_potential_macro_variable(); + // Let the context choose how to interpret the result. + // Weird, but useful for X-macros. + return Box::new(ParserAnyMacro { + parser: p, + + // Pass along the original expansion site and the name of the macro + // so we can print a useful error message if the parse of the expanded + // macro leaves unparsed tokens. + site_span: sp, + macro_ident: name, + arm_span, + }); + } + Failure(token, msg) => match best_failure { + Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {} + _ => best_failure = Some((token, msg)), + }, + Error(err_sp, ref msg) => cx.span_fatal(err_sp.substitute_dummy(sp), &msg[..]), + } + } + + let (token, label) = best_failure.expect("ran no matchers"); + let span = token.span.substitute_dummy(sp); + let mut err = cx.struct_span_err(span, &parse_failure_msg(&token)); + err.span_label(span, label); + if !def_span.is_dummy() && cx.source_map().span_to_filename(def_span).is_real() { + err.span_label(cx.source_map().def_span(def_span), "when calling this macro"); + } + + // Check whether there's a missing comma in this macro call, like `println!("{}" a);` + if let Some((arg, comma_span)) = arg.add_comma() { + for lhs in lhses { + // try each arm's matchers + let lhs_tt = match *lhs { + quoted::TokenTree::Delimited(_, ref delim) => &delim.tts[..], + _ => continue, + }; + match TokenTree::parse(cx, lhs_tt, arg.clone()) { + Success(_) => { + if comma_span.is_dummy() { + err.note("you might be missing a comma"); + } else { + err.span_suggestion_short( + comma_span, + "missing comma here", + ", ".to_string(), + Applicability::MachineApplicable, + ); + } + } + _ => {} + } + } + } + err.emit(); + cx.trace_macros_diag(); + DummyResult::any(sp) +} + +// Note that macro-by-example's input is also matched against a token tree: +// $( $lhs:tt => $rhs:tt );+ +// +// Holy self-referential! + +/// Converts a macro item into a syntax extension. +pub fn compile_declarative_macro( + sess: &ParseSess, + features: &Features, + def: &ast::Item, + edition: Edition, +) -> SyntaxExtension { + let diag = &sess.span_diagnostic; + let lhs_nm = ast::Ident::new(sym::lhs, def.span); + let rhs_nm = ast::Ident::new(sym::rhs, def.span); + let tt_spec = ast::Ident::new(sym::tt, def.span); + + // Parse the macro_rules! invocation + let body = match def.node { + ast::ItemKind::MacroDef(ref body) => body, + _ => unreachable!(), + }; + + // The pattern that macro_rules matches. + // The grammar for macro_rules! is: + // $( $lhs:tt => $rhs:tt );+ + // ...quasiquoting this would be nice. + // These spans won't matter, anyways + let argument_gram = vec![ + quoted::TokenTree::Sequence( + DelimSpan::dummy(), + Lrc::new(quoted::SequenceRepetition { + tts: vec![ + quoted::TokenTree::MetaVarDecl(def.span, lhs_nm, tt_spec), + quoted::TokenTree::token(token::FatArrow, def.span), + quoted::TokenTree::MetaVarDecl(def.span, rhs_nm, tt_spec), + ], + separator: Some(Token::new( + if body.legacy { token::Semi } else { token::Comma }, + def.span, + )), + kleene: quoted::KleeneToken::new(quoted::KleeneOp::OneOrMore, def.span), + num_captures: 2, + }), + ), + // to phase into semicolon-termination instead of semicolon-separation + quoted::TokenTree::Sequence( + DelimSpan::dummy(), + Lrc::new(quoted::SequenceRepetition { + tts: vec![quoted::TokenTree::token( + if body.legacy { token::Semi } else { token::Comma }, + def.span, + )], + separator: None, + kleene: quoted::KleeneToken::new(quoted::KleeneOp::ZeroOrMore, def.span), + num_captures: 0, + }), + ), + ]; + + let argument_map = match parse(sess, body.stream(), &argument_gram, None, true) { + Success(m) => m, + Failure(token, msg) => { + let s = parse_failure_msg(&token); + let sp = token.span.substitute_dummy(def.span); + let mut err = sess.span_diagnostic.struct_span_fatal(sp, &s); + err.span_label(sp, msg); + err.emit(); + FatalError.raise(); + } + Error(sp, s) => { + sess.span_diagnostic.span_fatal(sp.substitute_dummy(def.span), &s).raise(); + } + }; + + let mut valid = true; + + // Extract the arguments: + let lhses = match argument_map[&lhs_nm] { + MatchedSeq(ref s, _) => s + .iter() + .map(|m| { + if let MatchedNonterminal(ref nt) = *m { + if let NtTT(ref tt) = **nt { + let tt = quoted::parse( + tt.clone().into(), + true, + sess, + features, + &def.attrs, + edition, + def.id, + ) + .pop() + .unwrap(); + valid &= check_lhs_nt_follows(sess, features, &def.attrs, &tt); + return tt; + } + } + sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") + }) + .collect::>(), + _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs"), + }; + + let rhses = match argument_map[&rhs_nm] { + MatchedSeq(ref s, _) => s + .iter() + .map(|m| { + if let MatchedNonterminal(ref nt) = *m { + if let NtTT(ref tt) = **nt { + return quoted::parse( + tt.clone().into(), + false, + sess, + features, + &def.attrs, + edition, + def.id, + ) + .pop() + .unwrap(); + } + } + sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") + }) + .collect::>(), + _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs"), + }; + + for rhs in &rhses { + valid &= check_rhs(sess, rhs); + } + + // don't abort iteration early, so that errors for multiple lhses can be reported + for lhs in &lhses { + valid &= check_lhs_no_empty_seq(sess, slice::from_ref(lhs)); + } + + // We use CRATE_NODE_ID instead of `def.id` otherwise we may emit buffered lints for a node id + // that is not lint-checked and trigger the "failed to process buffered lint here" bug. + valid &= macro_check::check_meta_variables(sess, ast::CRATE_NODE_ID, def.span, &lhses, &rhses); + + let (transparency, transparency_error) = attr::find_transparency(&def.attrs, body.legacy); + match transparency_error { + Some(TransparencyError::UnknownTransparency(value, span)) => + diag.span_err(span, &format!("unknown macro transparency: `{}`", value)), + Some(TransparencyError::MultipleTransparencyAttrs(old_span, new_span)) => + diag.span_err(vec![old_span, new_span], "multiple macro transparency attributes"), + None => {} + } + + let expander: Box<_> = Box::new(MacroRulesMacroExpander { + name: def.ident, span: def.span, transparency, lhses, rhses, valid + }); + + SyntaxExtension::new( + sess, + SyntaxExtensionKind::LegacyBang(expander), + def.span, + Vec::new(), + edition, + def.ident.name, + &def.attrs, + ) +} + +fn check_lhs_nt_follows( + sess: &ParseSess, + features: &Features, + attrs: &[ast::Attribute], + lhs: "ed::TokenTree, +) -> bool { + // lhs is going to be like TokenTree::Delimited(...), where the + // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens. + if let quoted::TokenTree::Delimited(_, ref tts) = *lhs { + check_matcher(sess, features, attrs, &tts.tts) + } else { + let msg = "invalid macro matcher; matchers must be contained in balanced delimiters"; + sess.span_diagnostic.span_err(lhs.span(), msg); + false + } + // we don't abort on errors on rejection, the driver will do that for us + // after parsing/expansion. we can report every error in every macro this way. +} + +/// Checks that the lhs contains no repetition which could match an empty token +/// tree, because then the matcher would hang indefinitely. +fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[quoted::TokenTree]) -> bool { + use quoted::TokenTree; + for tt in tts { + match *tt { + TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => (), + TokenTree::Delimited(_, ref del) => { + if !check_lhs_no_empty_seq(sess, &del.tts) { + return false; + } + } + TokenTree::Sequence(span, ref seq) => { + if seq.separator.is_none() + && seq.tts.iter().all(|seq_tt| match *seq_tt { + TokenTree::MetaVarDecl(_, _, id) => id.name == sym::vis, + TokenTree::Sequence(_, ref sub_seq) => { + sub_seq.kleene.op == quoted::KleeneOp::ZeroOrMore + || sub_seq.kleene.op == quoted::KleeneOp::ZeroOrOne + } + _ => false, + }) + { + let sp = span.entire(); + sess.span_diagnostic.span_err(sp, "repetition matches empty token tree"); + return false; + } + if !check_lhs_no_empty_seq(sess, &seq.tts) { + return false; + } + } + } + } + + true +} + +fn check_rhs(sess: &ParseSess, rhs: "ed::TokenTree) -> bool { + match *rhs { + quoted::TokenTree::Delimited(..) => return true, + _ => sess.span_diagnostic.span_err(rhs.span(), "macro rhs must be delimited"), + } + false +} + +fn check_matcher( + sess: &ParseSess, + features: &Features, + attrs: &[ast::Attribute], + matcher: &[quoted::TokenTree], +) -> bool { + let first_sets = FirstSets::new(matcher); + let empty_suffix = TokenSet::empty(); + let err = sess.span_diagnostic.err_count(); + check_matcher_core(sess, features, attrs, &first_sets, matcher, &empty_suffix); + err == sess.span_diagnostic.err_count() +} + +// `The FirstSets` for a matcher is a mapping from subsequences in the +// matcher to the FIRST set for that subsequence. +// +// This mapping is partially precomputed via a backwards scan over the +// token trees of the matcher, which provides a mapping from each +// repetition sequence to its *first* set. +// +// (Hypothetically, sequences should be uniquely identifiable via their +// spans, though perhaps that is false, e.g., for macro-generated macros +// that do not try to inject artificial span information. My plan is +// to try to catch such cases ahead of time and not include them in +// the precomputed mapping.) +struct FirstSets { + // this maps each TokenTree::Sequence `$(tt ...) SEP OP` that is uniquely identified by its + // span in the original matcher to the First set for the inner sequence `tt ...`. + // + // If two sequences have the same span in a matcher, then map that + // span to None (invalidating the mapping here and forcing the code to + // use a slow path). + first: FxHashMap>, +} + +impl FirstSets { + fn new(tts: &[quoted::TokenTree]) -> FirstSets { + use quoted::TokenTree; + + let mut sets = FirstSets { first: FxHashMap::default() }; + build_recur(&mut sets, tts); + return sets; + + // walks backward over `tts`, returning the FIRST for `tts` + // and updating `sets` at the same time for all sequence + // substructure we find within `tts`. + fn build_recur(sets: &mut FirstSets, tts: &[TokenTree]) -> TokenSet { + let mut first = TokenSet::empty(); + for tt in tts.iter().rev() { + match *tt { + TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => { + first.replace_with(tt.clone()); + } + TokenTree::Delimited(span, ref delimited) => { + build_recur(sets, &delimited.tts[..]); + first.replace_with(delimited.open_tt(span.open)); + } + TokenTree::Sequence(sp, ref seq_rep) => { + let subfirst = build_recur(sets, &seq_rep.tts[..]); + + match sets.first.entry(sp.entire()) { + Entry::Vacant(vac) => { + vac.insert(Some(subfirst.clone())); + } + Entry::Occupied(mut occ) => { + // if there is already an entry, then a span must have collided. + // This should not happen with typical macro_rules macros, + // but syntax extensions need not maintain distinct spans, + // so distinct syntax trees can be assigned the same span. + // In such a case, the map cannot be trusted; so mark this + // entry as unusable. + occ.insert(None); + } + } + + // If the sequence contents can be empty, then the first + // token could be the separator token itself. + + if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { + first.add_one_maybe(TokenTree::Token(sep.clone())); + } + + // Reverse scan: Sequence comes before `first`. + if subfirst.maybe_empty + || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrMore + || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrOne + { + // If sequence is potentially empty, then + // union them (preserving first emptiness). + first.add_all(&TokenSet { maybe_empty: true, ..subfirst }); + } else { + // Otherwise, sequence guaranteed + // non-empty; replace first. + first = subfirst; + } + } + } + } + + first + } + } + + // walks forward over `tts` until all potential FIRST tokens are + // identified. + fn first(&self, tts: &[quoted::TokenTree]) -> TokenSet { + use quoted::TokenTree; + + let mut first = TokenSet::empty(); + for tt in tts.iter() { + assert!(first.maybe_empty); + match *tt { + TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => { + first.add_one(tt.clone()); + return first; + } + TokenTree::Delimited(span, ref delimited) => { + first.add_one(delimited.open_tt(span.open)); + return first; + } + TokenTree::Sequence(sp, ref seq_rep) => { + let subfirst_owned; + let subfirst = match self.first.get(&sp.entire()) { + Some(&Some(ref subfirst)) => subfirst, + Some(&None) => { + subfirst_owned = self.first(&seq_rep.tts[..]); + &subfirst_owned + } + None => { + panic!("We missed a sequence during FirstSets construction"); + } + }; + + // If the sequence contents can be empty, then the first + // token could be the separator token itself. + if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { + first.add_one_maybe(TokenTree::Token(sep.clone())); + } + + assert!(first.maybe_empty); + first.add_all(subfirst); + if subfirst.maybe_empty + || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrMore + || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrOne + { + // Continue scanning for more first + // tokens, but also make sure we + // restore empty-tracking state. + first.maybe_empty = true; + continue; + } else { + return first; + } + } + } + } + + // we only exit the loop if `tts` was empty or if every + // element of `tts` matches the empty sequence. + assert!(first.maybe_empty); + first + } +} + +// A set of `quoted::TokenTree`s, which may include `TokenTree::Match`s +// (for macro-by-example syntactic variables). It also carries the +// `maybe_empty` flag; that is true if and only if the matcher can +// match an empty token sequence. +// +// The First set is computed on submatchers like `$($a:expr b),* $(c)* d`, +// which has corresponding FIRST = {$a:expr, c, d}. +// Likewise, `$($a:expr b),* $(c)+ d` has FIRST = {$a:expr, c}. +// +// (Notably, we must allow for *-op to occur zero times.) +#[derive(Clone, Debug)] +struct TokenSet { + tokens: Vec, + maybe_empty: bool, +} + +impl TokenSet { + // Returns a set for the empty sequence. + fn empty() -> Self { + TokenSet { tokens: Vec::new(), maybe_empty: true } + } + + // Returns the set `{ tok }` for the single-token (and thus + // non-empty) sequence [tok]. + fn singleton(tok: quoted::TokenTree) -> Self { + TokenSet { tokens: vec![tok], maybe_empty: false } + } + + // Changes self to be the set `{ tok }`. + // Since `tok` is always present, marks self as non-empty. + fn replace_with(&mut self, tok: quoted::TokenTree) { + self.tokens.clear(); + self.tokens.push(tok); + self.maybe_empty = false; + } + + // Changes self to be the empty set `{}`; meant for use when + // the particular token does not matter, but we want to + // record that it occurs. + fn replace_with_irrelevant(&mut self) { + self.tokens.clear(); + self.maybe_empty = false; + } + + // Adds `tok` to the set for `self`, marking sequence as non-empy. + fn add_one(&mut self, tok: quoted::TokenTree) { + if !self.tokens.contains(&tok) { + self.tokens.push(tok); + } + self.maybe_empty = false; + } + + // Adds `tok` to the set for `self`. (Leaves `maybe_empty` flag alone.) + fn add_one_maybe(&mut self, tok: quoted::TokenTree) { + if !self.tokens.contains(&tok) { + self.tokens.push(tok); + } + } + + // Adds all elements of `other` to this. + // + // (Since this is a set, we filter out duplicates.) + // + // If `other` is potentially empty, then preserves the previous + // setting of the empty flag of `self`. If `other` is guaranteed + // non-empty, then `self` is marked non-empty. + fn add_all(&mut self, other: &Self) { + for tok in &other.tokens { + if !self.tokens.contains(tok) { + self.tokens.push(tok.clone()); + } + } + if !other.maybe_empty { + self.maybe_empty = false; + } + } +} + +// Checks that `matcher` is internally consistent and that it +// can legally be followed by a token `N`, for all `N` in `follow`. +// (If `follow` is empty, then it imposes no constraint on +// the `matcher`.) +// +// Returns the set of NT tokens that could possibly come last in +// `matcher`. (If `matcher` matches the empty sequence, then +// `maybe_empty` will be set to true.) +// +// Requires that `first_sets` is pre-computed for `matcher`; +// see `FirstSets::new`. +fn check_matcher_core( + sess: &ParseSess, + features: &Features, + attrs: &[ast::Attribute], + first_sets: &FirstSets, + matcher: &[quoted::TokenTree], + follow: &TokenSet, +) -> TokenSet { + use quoted::TokenTree; + + let mut last = TokenSet::empty(); + + // 2. For each token and suffix [T, SUFFIX] in M: + // ensure that T can be followed by SUFFIX, and if SUFFIX may be empty, + // then ensure T can also be followed by any element of FOLLOW. + 'each_token: for i in 0..matcher.len() { + let token = &matcher[i]; + let suffix = &matcher[i + 1..]; + + let build_suffix_first = || { + let mut s = first_sets.first(suffix); + if s.maybe_empty { + s.add_all(follow); + } + s + }; + + // (we build `suffix_first` on demand below; you can tell + // which cases are supposed to fall through by looking for the + // initialization of this variable.) + let suffix_first; + + // First, update `last` so that it corresponds to the set + // of NT tokens that might end the sequence `... token`. + match *token { + TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => { + let can_be_followed_by_any; + if let Err(bad_frag) = has_legal_fragment_specifier(sess, features, attrs, token) { + let msg = format!("invalid fragment specifier `{}`", bad_frag); + sess.span_diagnostic + .struct_span_err(token.span(), &msg) + .help(VALID_FRAGMENT_NAMES_MSG) + .emit(); + // (This eliminates false positives and duplicates + // from error messages.) + can_be_followed_by_any = true; + } else { + can_be_followed_by_any = token_can_be_followed_by_any(token); + } + + if can_be_followed_by_any { + // don't need to track tokens that work with any, + last.replace_with_irrelevant(); + // ... and don't need to check tokens that can be + // followed by anything against SUFFIX. + continue 'each_token; + } else { + last.replace_with(token.clone()); + suffix_first = build_suffix_first(); + } + } + TokenTree::Delimited(span, ref d) => { + let my_suffix = TokenSet::singleton(d.close_tt(span.close)); + check_matcher_core(sess, features, attrs, first_sets, &d.tts, &my_suffix); + // don't track non NT tokens + last.replace_with_irrelevant(); + + // also, we don't need to check delimited sequences + // against SUFFIX + continue 'each_token; + } + TokenTree::Sequence(_, ref seq_rep) => { + suffix_first = build_suffix_first(); + // The trick here: when we check the interior, we want + // to include the separator (if any) as a potential + // (but not guaranteed) element of FOLLOW. So in that + // case, we make a temp copy of suffix and stuff + // delimiter in there. + // + // FIXME: Should I first scan suffix_first to see if + // delimiter is already in it before I go through the + // work of cloning it? But then again, this way I may + // get a "tighter" span? + let mut new; + let my_suffix = if let Some(sep) = &seq_rep.separator { + new = suffix_first.clone(); + new.add_one_maybe(TokenTree::Token(sep.clone())); + &new + } else { + &suffix_first + }; + + // At this point, `suffix_first` is built, and + // `my_suffix` is some TokenSet that we can use + // for checking the interior of `seq_rep`. + let next = + check_matcher_core(sess, features, attrs, first_sets, &seq_rep.tts, my_suffix); + if next.maybe_empty { + last.add_all(&next); + } else { + last = next; + } + + // the recursive call to check_matcher_core already ran the 'each_last + // check below, so we can just keep going forward here. + continue 'each_token; + } + } + + // (`suffix_first` guaranteed initialized once reaching here.) + + // Now `last` holds the complete set of NT tokens that could + // end the sequence before SUFFIX. Check that every one works with `suffix`. + 'each_last: for token in &last.tokens { + if let TokenTree::MetaVarDecl(_, name, frag_spec) = *token { + for next_token in &suffix_first.tokens { + match is_in_follow(next_token, frag_spec.name) { + IsInFollow::Invalid(msg, help) => { + sess.span_diagnostic + .struct_span_err(next_token.span(), &msg) + .help(help) + .emit(); + // don't bother reporting every source of + // conflict for a particular element of `last`. + continue 'each_last; + } + IsInFollow::Yes => {} + IsInFollow::No(possible) => { + let may_be = if last.tokens.len() == 1 && suffix_first.tokens.len() == 1 + { + "is" + } else { + "may be" + }; + + let sp = next_token.span(); + let mut err = sess.span_diagnostic.struct_span_err( + sp, + &format!( + "`${name}:{frag}` {may_be} followed by `{next}`, which \ + is not allowed for `{frag}` fragments", + name = name, + frag = frag_spec, + next = quoted_tt_to_string(next_token), + may_be = may_be + ), + ); + err.span_label( + sp, + format!("not allowed after `{}` fragments", frag_spec), + ); + let msg = "allowed there are: "; + match possible { + &[] => {} + &[t] => { + err.note(&format!( + "only {} is allowed after `{}` fragments", + t, frag_spec, + )); + } + ts => { + err.note(&format!( + "{}{} or {}", + msg, + ts[..ts.len() - 1] + .iter() + .map(|s| *s) + .collect::>() + .join(", "), + ts[ts.len() - 1], + )); + } + } + err.emit(); + } + } + } + } + } + } + last +} + +fn token_can_be_followed_by_any(tok: "ed::TokenTree) -> bool { + if let quoted::TokenTree::MetaVarDecl(_, _, frag_spec) = *tok { + frag_can_be_followed_by_any(frag_spec.name) + } else { + // (Non NT's can always be followed by anthing in matchers.) + true + } +} + +/// Returns `true` if a fragment of type `frag` can be followed by any sort of +/// token. We use this (among other things) as a useful approximation +/// for when `frag` can be followed by a repetition like `$(...)*` or +/// `$(...)+`. In general, these can be a bit tricky to reason about, +/// so we adopt a conservative position that says that any fragment +/// specifier which consumes at most one token tree can be followed by +/// a fragment specifier (indeed, these fragments can be followed by +/// ANYTHING without fear of future compatibility hazards). +fn frag_can_be_followed_by_any(frag: Symbol) -> bool { + match frag { + sym::item | // always terminated by `}` or `;` + sym::block | // exactly one token tree + sym::ident | // exactly one token tree + sym::literal | // exactly one token tree + sym::meta | // exactly one token tree + sym::lifetime | // exactly one token tree + sym::tt => // exactly one token tree + true, + + _ => + false, + } +} + +enum IsInFollow { + Yes, + No(&'static [&'static str]), + Invalid(String, &'static str), +} + +/// Returns `true` if `frag` can legally be followed by the token `tok`. For +/// fragments that can consume an unbounded number of tokens, `tok` +/// must be within a well-defined follow set. This is intended to +/// guarantee future compatibility: for example, without this rule, if +/// we expanded `expr` to include a new binary operator, we might +/// break macros that were relying on that binary operator as a +/// separator. +// when changing this do not forget to update doc/book/macros.md! +fn is_in_follow(tok: "ed::TokenTree, frag: Symbol) -> IsInFollow { + use quoted::TokenTree; + + if let TokenTree::Token(Token { kind: token::CloseDelim(_), .. }) = *tok { + // closing a token tree can never be matched by any fragment; + // iow, we always require that `(` and `)` match, etc. + IsInFollow::Yes + } else { + match frag { + sym::item => { + // since items *must* be followed by either a `;` or a `}`, we can + // accept anything after them + IsInFollow::Yes + } + sym::block => { + // anything can follow block, the braces provide an easy boundary to + // maintain + IsInFollow::Yes + } + sym::stmt | sym::expr => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`;`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Semi => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + sym::pat => { + const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"]; + match tok { + TokenTree::Token(token) => match token.kind { + FatArrow | Comma | Eq | BinOp(token::Or) => IsInFollow::Yes, + Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes, + _ => IsInFollow::No(TOKENS), + }, + _ => IsInFollow::No(TOKENS), + } + } + sym::path | sym::ty => { + const TOKENS: &[&str] = &[ + "`{`", "`[`", "`=>`", "`,`", "`>`", "`=`", "`:`", "`;`", "`|`", "`as`", + "`where`", + ]; + match tok { + TokenTree::Token(token) => match token.kind { + OpenDelim(token::DelimToken::Brace) + | OpenDelim(token::DelimToken::Bracket) + | Comma + | FatArrow + | Colon + | Eq + | Gt + | BinOp(token::Shr) + | Semi + | BinOp(token::Or) => IsInFollow::Yes, + Ident(name, false) if name == kw::As || name == kw::Where => { + IsInFollow::Yes + } + _ => IsInFollow::No(TOKENS), + }, + TokenTree::MetaVarDecl(_, _, frag) if frag.name == sym::block => { + IsInFollow::Yes + } + _ => IsInFollow::No(TOKENS), + } + } + sym::ident | sym::lifetime => { + // being a single token, idents and lifetimes are harmless + IsInFollow::Yes + } + sym::literal => { + // literals may be of a single token, or two tokens (negative numbers) + IsInFollow::Yes + } + sym::meta | sym::tt => { + // being either a single token or a delimited sequence, tt is + // harmless + IsInFollow::Yes + } + sym::vis => { + // Explicitly disallow `priv`, on the off chance it comes back. + const TOKENS: &[&str] = &["`,`", "an ident", "a type"]; + match tok { + TokenTree::Token(token) => match token.kind { + Comma => IsInFollow::Yes, + Ident(name, is_raw) if is_raw || name != kw::Priv => IsInFollow::Yes, + _ => { + if token.can_begin_type() { + IsInFollow::Yes + } else { + IsInFollow::No(TOKENS) + } + } + }, + TokenTree::MetaVarDecl(_, _, frag) + if frag.name == sym::ident + || frag.name == sym::ty + || frag.name == sym::path => + { + IsInFollow::Yes + } + _ => IsInFollow::No(TOKENS), + } + } + kw::Invalid => IsInFollow::Yes, + _ => IsInFollow::Invalid( + format!("invalid fragment specifier `{}`", frag), + VALID_FRAGMENT_NAMES_MSG, + ), + } + } +} + +fn has_legal_fragment_specifier( + sess: &ParseSess, + features: &Features, + attrs: &[ast::Attribute], + tok: "ed::TokenTree, +) -> Result<(), String> { + debug!("has_legal_fragment_specifier({:?})", tok); + if let quoted::TokenTree::MetaVarDecl(_, _, ref frag_spec) = *tok { + let frag_span = tok.span(); + if !is_legal_fragment_specifier(sess, features, attrs, frag_spec.name, frag_span) { + return Err(frag_spec.to_string()); + } + } + Ok(()) +} + +fn is_legal_fragment_specifier( + _sess: &ParseSess, + _features: &Features, + _attrs: &[ast::Attribute], + frag_name: Symbol, + _frag_span: Span, +) -> bool { + /* + * If new fragment specifiers are invented in nightly, `_sess`, + * `_features`, `_attrs`, and `_frag_span` will be useful here + * for checking against feature gates. See past versions of + * this function. + */ + match frag_name { + sym::item + | sym::block + | sym::stmt + | sym::expr + | sym::pat + | sym::lifetime + | sym::path + | sym::ty + | sym::ident + | sym::meta + | sym::tt + | sym::vis + | sym::literal + | kw::Invalid => true, + _ => false, + } +} + +fn quoted_tt_to_string(tt: "ed::TokenTree) -> String { + match *tt { + quoted::TokenTree::Token(ref token) => crate::print::pprust::token_to_string(&token), + quoted::TokenTree::MetaVar(_, name) => format!("${}", name), + quoted::TokenTree::MetaVarDecl(_, name, kind) => format!("${}:{}", name, kind), + _ => panic!( + "unexpected quoted::TokenTree::{{Sequence or Delimited}} \ + in follow set checker" + ), + } +} diff --git a/src/libsyntax/ext/mbe/quoted.rs b/src/libsyntax/ext/mbe/quoted.rs new file mode 100644 index 00000000000..d161e6638bf --- /dev/null +++ b/src/libsyntax/ext/mbe/quoted.rs @@ -0,0 +1,433 @@ +use crate::ast; +use crate::ast::NodeId; +use crate::ext::tt::macro_parser; +use crate::feature_gate::Features; +use crate::parse::token::{self, Token, TokenKind}; +use crate::parse::ParseSess; +use crate::print::pprust; +use crate::symbol::kw; +use crate::tokenstream::{self, DelimSpan}; + +use syntax_pos::{edition::Edition, BytePos, Span}; + +use rustc_data_structures::sync::Lrc; +use std::iter::Peekable; + +/// Contains the sub-token-trees of a "delimited" token tree, such as the contents of `(`. Note +/// that the delimiter itself might be `NoDelim`. +#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] +crate struct Delimited { + crate delim: token::DelimToken, + crate tts: Vec, +} + +impl Delimited { + /// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter. + crate fn open_tt(&self, span: Span) -> TokenTree { + let open_span = if span.is_dummy() { + span + } else { + span.with_hi(span.lo() + BytePos(self.delim.len() as u32)) + }; + TokenTree::token(token::OpenDelim(self.delim), open_span) + } + + /// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter. + crate fn close_tt(&self, span: Span) -> TokenTree { + let close_span = if span.is_dummy() { + span + } else { + span.with_lo(span.hi() - BytePos(self.delim.len() as u32)) + }; + TokenTree::token(token::CloseDelim(self.delim), close_span) + } +} + +#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] +crate struct SequenceRepetition { + /// The sequence of token trees + crate tts: Vec, + /// The optional separator + crate separator: Option, + /// Whether the sequence can be repeated zero (*), or one or more times (+) + crate kleene: KleeneToken, + /// The number of `Match`s that appear in the sequence (and subsequences) + crate num_captures: usize, +} + +#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug, Copy)] +crate struct KleeneToken { + crate span: Span, + crate op: KleeneOp, +} + +impl KleeneToken { + crate fn new(op: KleeneOp, span: Span) -> KleeneToken { + KleeneToken { span, op } + } +} + +/// A Kleene-style [repetition operator](http://en.wikipedia.org/wiki/Kleene_star) +/// for token sequences. +#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] +crate enum KleeneOp { + /// Kleene star (`*`) for zero or more repetitions + ZeroOrMore, + /// Kleene plus (`+`) for one or more repetitions + OneOrMore, + /// Kleene optional (`?`) for zero or one reptitions + ZeroOrOne, +} + +/// Similar to `tokenstream::TokenTree`, except that `$i`, `$i:ident`, and `$(...)` +/// are "first-class" token trees. Useful for parsing macros. +#[derive(Debug, Clone, PartialEq, RustcEncodable, RustcDecodable)] +crate enum TokenTree { + Token(Token), + Delimited(DelimSpan, Lrc), + /// A kleene-style repetition sequence + Sequence(DelimSpan, Lrc), + /// e.g., `$var` + MetaVar(Span, ast::Ident), + /// e.g., `$var:expr`. This is only used in the left hand side of MBE macros. + MetaVarDecl( + Span, + ast::Ident, /* name to bind */ + ast::Ident, /* kind of nonterminal */ + ), +} + +impl TokenTree { + /// Return the number of tokens in the tree. + crate fn len(&self) -> usize { + match *self { + TokenTree::Delimited(_, ref delimed) => match delimed.delim { + token::NoDelim => delimed.tts.len(), + _ => delimed.tts.len() + 2, + }, + TokenTree::Sequence(_, ref seq) => seq.tts.len(), + _ => 0, + } + } + + /// Returns `true` if the given token tree is delimited. + crate fn is_delimited(&self) -> bool { + match *self { + TokenTree::Delimited(..) => true, + _ => false, + } + } + + /// Returns `true` if the given token tree is a token of the given kind. + crate fn is_token(&self, expected_kind: &TokenKind) -> bool { + match self { + TokenTree::Token(Token { kind: actual_kind, .. }) => actual_kind == expected_kind, + _ => false, + } + } + + /// Gets the `index`-th sub-token-tree. This only makes sense for delimited trees and sequences. + crate fn get_tt(&self, index: usize) -> TokenTree { + match (self, index) { + (&TokenTree::Delimited(_, ref delimed), _) if delimed.delim == token::NoDelim => { + delimed.tts[index].clone() + } + (&TokenTree::Delimited(span, ref delimed), _) => { + if index == 0 { + return delimed.open_tt(span.open); + } + if index == delimed.tts.len() + 1 { + return delimed.close_tt(span.close); + } + delimed.tts[index - 1].clone() + } + (&TokenTree::Sequence(_, ref seq), _) => seq.tts[index].clone(), + _ => panic!("Cannot expand a token tree"), + } + } + + /// Retrieves the `TokenTree`'s span. + crate fn span(&self) -> Span { + match *self { + TokenTree::Token(Token { span, .. }) + | TokenTree::MetaVar(span, _) + | TokenTree::MetaVarDecl(span, _, _) => span, + TokenTree::Delimited(span, _) | TokenTree::Sequence(span, _) => span.entire(), + } + } + + crate fn token(kind: TokenKind, span: Span) -> TokenTree { + TokenTree::Token(Token::new(kind, span)) + } +} + +/// Takes a `tokenstream::TokenStream` and returns a `Vec`. Specifically, this +/// takes a generic `TokenStream`, such as is used in the rest of the compiler, and returns a +/// collection of `TokenTree` for use in parsing a macro. +/// +/// # Parameters +/// +/// - `input`: a token stream to read from, the contents of which we are parsing. +/// - `expect_matchers`: `parse` can be used to parse either the "patterns" or the "body" of a +/// macro. Both take roughly the same form _except_ that in a pattern, metavars are declared with +/// their "matcher" type. For example `$var:expr` or `$id:ident`. In this example, `expr` and +/// `ident` are "matchers". They are not present in the body of a macro rule -- just in the +/// pattern, so we pass a parameter to indicate whether to expect them or not. +/// - `sess`: the parsing session. Any errors will be emitted to this session. +/// - `features`, `attrs`: language feature flags and attributes so that we know whether to use +/// unstable features or not. +/// - `edition`: which edition are we in. +/// - `macro_node_id`: the NodeId of the macro we are parsing. +/// +/// # Returns +/// +/// A collection of `self::TokenTree`. There may also be some errors emitted to `sess`. +crate fn parse( + input: tokenstream::TokenStream, + expect_matchers: bool, + sess: &ParseSess, + features: &Features, + attrs: &[ast::Attribute], + edition: Edition, + macro_node_id: NodeId, +) -> Vec { + // Will contain the final collection of `self::TokenTree` + let mut result = Vec::new(); + + // For each token tree in `input`, parse the token into a `self::TokenTree`, consuming + // additional trees if need be. + let mut trees = input.trees().peekable(); + while let Some(tree) = trees.next() { + // Given the parsed tree, if there is a metavar and we are expecting matchers, actually + // parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`). + let tree = parse_tree( + tree, + &mut trees, + expect_matchers, + sess, + features, + attrs, + edition, + macro_node_id, + ); + match tree { + TokenTree::MetaVar(start_sp, ident) if expect_matchers => { + let span = match trees.next() { + Some(tokenstream::TokenTree::Token(Token { kind: token::Colon, span })) => { + match trees.next() { + Some(tokenstream::TokenTree::Token(token)) => match token.ident() { + Some((kind, _)) => { + let span = token.span.with_lo(start_sp.lo()); + result.push(TokenTree::MetaVarDecl(span, ident, kind)); + continue; + } + _ => token.span, + }, + tree => tree.as_ref().map(tokenstream::TokenTree::span).unwrap_or(span), + } + } + tree => tree.as_ref().map(tokenstream::TokenTree::span).unwrap_or(start_sp), + }; + sess.missing_fragment_specifiers.borrow_mut().insert(span); + result.push(TokenTree::MetaVarDecl(span, ident, ast::Ident::invalid())); + } + + // Not a metavar or no matchers allowed, so just return the tree + _ => result.push(tree), + } + } + result +} + +/// Takes a `tokenstream::TokenTree` and returns a `self::TokenTree`. Specifically, this takes a +/// generic `TokenTree`, such as is used in the rest of the compiler, and returns a `TokenTree` +/// for use in parsing a macro. +/// +/// Converting the given tree may involve reading more tokens. +/// +/// # Parameters +/// +/// - `tree`: the tree we wish to convert. +/// - `trees`: an iterator over trees. We may need to read more tokens from it in order to finish +/// converting `tree` +/// - `expect_matchers`: same as for `parse` (see above). +/// - `sess`: the parsing session. Any errors will be emitted to this session. +/// - `features`, `attrs`: language feature flags and attributes so that we know whether to use +/// unstable features or not. +fn parse_tree( + tree: tokenstream::TokenTree, + trees: &mut Peekable>, + expect_matchers: bool, + sess: &ParseSess, + features: &Features, + attrs: &[ast::Attribute], + edition: Edition, + macro_node_id: NodeId, +) -> TokenTree { + // Depending on what `tree` is, we could be parsing different parts of a macro + match tree { + // `tree` is a `$` token. Look at the next token in `trees` + tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }) => match trees.next() { + // `tree` is followed by a delimited set of token trees. This indicates the beginning + // of a repetition sequence in the macro (e.g. `$(pat)*`). + Some(tokenstream::TokenTree::Delimited(span, delim, tts)) => { + // Must have `(` not `{` or `[` + if delim != token::Paren { + let tok = pprust::token_kind_to_string(&token::OpenDelim(delim)); + let msg = format!("expected `(`, found `{}`", tok); + sess.span_diagnostic.span_err(span.entire(), &msg); + } + // Parse the contents of the sequence itself + let sequence = parse( + tts.into(), + expect_matchers, + sess, + features, + attrs, + edition, + macro_node_id, + ); + // Get the Kleene operator and optional separator + let (separator, kleene) = parse_sep_and_kleene_op(trees, span.entire(), sess); + // Count the number of captured "names" (i.e., named metavars) + let name_captures = macro_parser::count_names(&sequence); + TokenTree::Sequence( + span, + Lrc::new(SequenceRepetition { + tts: sequence, + separator, + kleene, + num_captures: name_captures, + }), + ) + } + + // `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate` special + // metavariable that names the crate of the invocation. + Some(tokenstream::TokenTree::Token(token)) if token.is_ident() => { + let (ident, is_raw) = token.ident().unwrap(); + let span = ident.span.with_lo(span.lo()); + if ident.name == kw::Crate && !is_raw { + TokenTree::token(token::Ident(kw::DollarCrate, is_raw), span) + } else { + TokenTree::MetaVar(span, ident) + } + } + + // `tree` is followed by a random token. This is an error. + Some(tokenstream::TokenTree::Token(token)) => { + let msg = + format!("expected identifier, found `{}`", pprust::token_to_string(&token),); + sess.span_diagnostic.span_err(token.span, &msg); + TokenTree::MetaVar(token.span, ast::Ident::invalid()) + } + + // There are no more tokens. Just return the `$` we already have. + None => TokenTree::token(token::Dollar, span), + }, + + // `tree` is an arbitrary token. Keep it. + tokenstream::TokenTree::Token(token) => TokenTree::Token(token), + + // `tree` is the beginning of a delimited set of tokens (e.g., `(` or `{`). We need to + // descend into the delimited set and further parse it. + tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited( + span, + Lrc::new(Delimited { + delim, + tts: parse( + tts.into(), + expect_matchers, + sess, + features, + attrs, + edition, + macro_node_id, + ), + }), + ), + } +} + +/// Takes a token and returns `Some(KleeneOp)` if the token is `+` `*` or `?`. Otherwise, return +/// `None`. +fn kleene_op(token: &Token) -> Option { + match token.kind { + token::BinOp(token::Star) => Some(KleeneOp::ZeroOrMore), + token::BinOp(token::Plus) => Some(KleeneOp::OneOrMore), + token::Question => Some(KleeneOp::ZeroOrOne), + _ => None, + } +} + +/// Parse the next token tree of the input looking for a KleeneOp. Returns +/// +/// - Ok(Ok((op, span))) if the next token tree is a KleeneOp +/// - Ok(Err(tok, span)) if the next token tree is a token but not a KleeneOp +/// - Err(span) if the next token tree is not a token +fn parse_kleene_op( + input: &mut impl Iterator, + span: Span, +) -> Result, Span> { + match input.next() { + Some(tokenstream::TokenTree::Token(token)) => match kleene_op(&token) { + Some(op) => Ok(Ok((op, token.span))), + None => Ok(Err(token)), + }, + tree => Err(tree.as_ref().map(tokenstream::TokenTree::span).unwrap_or(span)), + } +} + +/// Attempt to parse a single Kleene star, possibly with a separator. +/// +/// For example, in a pattern such as `$(a),*`, `a` is the pattern to be repeated, `,` is the +/// separator, and `*` is the Kleene operator. This function is specifically concerned with parsing +/// the last two tokens of such a pattern: namely, the optional separator and the Kleene operator +/// itself. Note that here we are parsing the _macro_ itself, rather than trying to match some +/// stream of tokens in an invocation of a macro. +/// +/// This function will take some input iterator `input` corresponding to `span` and a parsing +/// session `sess`. If the next one (or possibly two) tokens in `input` correspond to a Kleene +/// operator and separator, then a tuple with `(separator, KleeneOp)` is returned. Otherwise, an +/// error with the appropriate span is emitted to `sess` and a dummy value is returned. +fn parse_sep_and_kleene_op( + input: &mut Peekable>, + span: Span, + sess: &ParseSess, +) -> (Option, KleeneToken) { + // We basically look at two token trees here, denoted as #1 and #2 below + let span = match parse_kleene_op(input, span) { + // #1 is a `?`, `+`, or `*` KleeneOp + Ok(Ok((op, span))) => return (None, KleeneToken::new(op, span)), + + // #1 is a separator followed by #2, a KleeneOp + Ok(Err(token)) => match parse_kleene_op(input, token.span) { + // #2 is the `?` Kleene op, which does not take a separator (error) + Ok(Ok((KleeneOp::ZeroOrOne, span))) => { + // Error! + sess.span_diagnostic.span_err( + token.span, + "the `?` macro repetition operator does not take a separator", + ); + + // Return a dummy + return (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)); + } + + // #2 is a KleeneOp :D + Ok(Ok((op, span))) => return (Some(token), KleeneToken::new(op, span)), + + // #2 is a random token or not a token at all :( + Ok(Err(Token { span, .. })) | Err(span) => span, + }, + + // #1 is not a token + Err(span) => span, + }; + + // If we ever get to this point, we have experienced an "unexpected token" error + sess.span_diagnostic.span_err(span, "expected one of: `*`, `+`, or `?`"); + + // Return a dummy + (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)) +} diff --git a/src/libsyntax/ext/mbe/transcribe.rs b/src/libsyntax/ext/mbe/transcribe.rs new file mode 100644 index 00000000000..f9c07e3a2e4 --- /dev/null +++ b/src/libsyntax/ext/mbe/transcribe.rs @@ -0,0 +1,398 @@ +use crate::ast::{Ident, Mac}; +use crate::ext::base::ExtCtxt; +use crate::ext::tt::macro_parser::{MatchedNonterminal, MatchedSeq, NamedMatch}; +use crate::ext::tt::quoted; +use crate::mut_visit::{self, MutVisitor}; +use crate::parse::token::{self, NtTT, Token}; +use crate::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndJoint}; + +use smallvec::{smallvec, SmallVec}; + +use errors::pluralise; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::sync::Lrc; +use syntax_pos::hygiene::{ExpnId, Transparency}; +use syntax_pos::Span; + +use std::mem; + +// A Marker adds the given mark to the syntax context. +struct Marker(ExpnId, Transparency); + +impl MutVisitor for Marker { + fn visit_span(&mut self, span: &mut Span) { + *span = span.apply_mark(self.0, self.1) + } + + fn visit_mac(&mut self, mac: &mut Mac) { + mut_visit::noop_visit_mac(mac, self) + } +} + +impl Marker { + fn visit_delim_span(&mut self, dspan: &mut DelimSpan) { + self.visit_span(&mut dspan.open); + self.visit_span(&mut dspan.close); + } +} + +/// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`). +enum Frame { + Delimited { forest: Lrc, idx: usize, span: DelimSpan }, + Sequence { forest: Lrc, idx: usize, sep: Option }, +} + +impl Frame { + /// Construct a new frame around the delimited set of tokens. + fn new(tts: Vec) -> Frame { + let forest = Lrc::new(quoted::Delimited { delim: token::NoDelim, tts }); + Frame::Delimited { forest, idx: 0, span: DelimSpan::dummy() } + } +} + +impl Iterator for Frame { + type Item = quoted::TokenTree; + + fn next(&mut self) -> Option { + match *self { + Frame::Delimited { ref forest, ref mut idx, .. } => { + *idx += 1; + forest.tts.get(*idx - 1).cloned() + } + Frame::Sequence { ref forest, ref mut idx, .. } => { + *idx += 1; + forest.tts.get(*idx - 1).cloned() + } + } + } +} + +/// This can do Macro-By-Example transcription. +/// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the +/// invocation. We are assuming we already know there is a match. +/// - `src` is the RHS of the MBE, that is, the "example" we are filling in. +/// +/// For example, +/// +/// ```rust +/// macro_rules! foo { +/// ($id:ident) => { println!("{}", stringify!($id)); } +/// } +/// +/// foo!(bar); +/// ``` +/// +/// `interp` would contain `$id => bar` and `src` would contain `println!("{}", stringify!($id));`. +/// +/// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`. +/// +/// Along the way, we do some additional error checking. +pub(super) fn transcribe( + cx: &ExtCtxt<'_>, + interp: &FxHashMap, + src: Vec, + transparency: Transparency, +) -> TokenStream { + // Nothing for us to transcribe... + if src.is_empty() { + return TokenStream::empty(); + } + + // We descend into the RHS (`src`), expanding things as we go. This stack contains the things + // we have yet to expand/are still expanding. We start the stack off with the whole RHS. + let mut stack: SmallVec<[Frame; 1]> = smallvec![Frame::new(src)]; + + // As we descend in the RHS, we will need to be able to match nested sequences of matchers. + // `repeats` keeps track of where we are in matching at each level, with the last element being + // the most deeply nested sequence. This is used as a stack. + let mut repeats = Vec::new(); + + // `result` contains resulting token stream from the TokenTree we just finished processing. At + // the end, this will contain the full result of transcription, but at arbitrary points during + // `transcribe`, `result` will contain subsets of the final result. + // + // Specifically, as we descend into each TokenTree, we will push the existing results onto the + // `result_stack` and clear `results`. We will then produce the results of transcribing the + // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the + // `result_stack` and append `results` too it to produce the new `results` up to that point. + // + // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level + // again, and we are done transcribing. + let mut result: Vec = Vec::new(); + let mut result_stack = Vec::new(); + let mut marker = Marker(cx.current_expansion.id, transparency); + + loop { + // Look at the last frame on the stack. + let tree = if let Some(tree) = stack.last_mut().unwrap().next() { + // If it still has a TokenTree we have not looked at yet, use that tree. + tree + } + // The else-case never produces a value for `tree` (it `continue`s or `return`s). + else { + // Otherwise, if we have just reached the end of a sequence and we can keep repeating, + // go back to the beginning of the sequence. + if let Frame::Sequence { idx, sep, .. } = stack.last_mut().unwrap() { + let (repeat_idx, repeat_len) = repeats.last_mut().unwrap(); + *repeat_idx += 1; + if repeat_idx < repeat_len { + *idx = 0; + if let Some(sep) = sep { + result.push(TokenTree::Token(sep.clone()).into()); + } + continue; + } + } + + // We are done with the top of the stack. Pop it. Depending on what it was, we do + // different things. Note that the outermost item must be the delimited, wrapped RHS + // that was passed in originally to `transcribe`. + match stack.pop().unwrap() { + // Done with a sequence. Pop from repeats. + Frame::Sequence { .. } => { + repeats.pop(); + } + + // We are done processing a Delimited. If this is the top-level delimited, we are + // done. Otherwise, we unwind the result_stack to append what we have produced to + // any previous results. + Frame::Delimited { forest, span, .. } => { + if result_stack.is_empty() { + // No results left to compute! We are back at the top-level. + return TokenStream::new(result); + } + + // Step back into the parent Delimited. + let tree = + TokenTree::Delimited(span, forest.delim, TokenStream::new(result).into()); + result = result_stack.pop().unwrap(); + result.push(tree.into()); + } + } + continue; + }; + + // At this point, we know we are in the middle of a TokenTree (the last one on `stack`). + // `tree` contains the next `TokenTree` to be processed. + match tree { + // We are descending into a sequence. We first make sure that the matchers in the RHS + // and the matches in `interp` have the same shape. Otherwise, either the caller or the + // macro writer has made a mistake. + seq @ quoted::TokenTree::Sequence(..) => { + match lockstep_iter_size(&seq, interp, &repeats) { + LockstepIterSize::Unconstrained => { + cx.span_fatal( + seq.span(), /* blame macro writer */ + "attempted to repeat an expression containing no syntax variables \ + matched as repeating at this depth", + ); + } + + LockstepIterSize::Contradiction(ref msg) => { + // FIXME: this really ought to be caught at macro definition time... It + // happens when two meta-variables are used in the same repetition in a + // sequence, but they come from different sequence matchers and repeat + // different amounts. + cx.span_fatal(seq.span(), &msg[..]); + } + + LockstepIterSize::Constraint(len, _) => { + // We do this to avoid an extra clone above. We know that this is a + // sequence already. + let (sp, seq) = if let quoted::TokenTree::Sequence(sp, seq) = seq { + (sp, seq) + } else { + unreachable!() + }; + + // Is the repetition empty? + if len == 0 { + if seq.kleene.op == quoted::KleeneOp::OneOrMore { + // FIXME: this really ought to be caught at macro definition + // time... It happens when the Kleene operator in the matcher and + // the body for the same meta-variable do not match. + cx.span_fatal(sp.entire(), "this must repeat at least once"); + } + } else { + // 0 is the initial counter (we have done 0 repretitions so far). `len` + // is the total number of reptitions we should generate. + repeats.push((0, len)); + + // The first time we encounter the sequence we push it to the stack. It + // then gets reused (see the beginning of the loop) until we are done + // repeating. + stack.push(Frame::Sequence { + idx: 0, + sep: seq.separator.clone(), + forest: seq, + }); + } + } + } + } + + // Replace the meta-var with the matched token tree from the invocation. + quoted::TokenTree::MetaVar(mut sp, mut ident) => { + // Find the matched nonterminal from the macro invocation, and use it to replace + // the meta-var. + if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { + if let MatchedNonterminal(ref nt) = cur_matched { + // FIXME #2887: why do we apply a mark when matching a token tree meta-var + // (e.g. `$x:tt`), but not when we are matching any other type of token + // tree? + if let NtTT(ref tt) = **nt { + result.push(tt.clone().into()); + } else { + marker.visit_span(&mut sp); + let token = TokenTree::token(token::Interpolated(nt.clone()), sp); + result.push(token.into()); + } + } else { + // We were unable to descend far enough. This is an error. + cx.span_fatal( + sp, /* blame the macro writer */ + &format!("variable '{}' is still repeating at this depth", ident), + ); + } + } else { + // If we aren't able to match the meta-var, we push it back into the result but + // with modified syntax context. (I believe this supports nested macros). + marker.visit_span(&mut sp); + marker.visit_ident(&mut ident); + result.push(TokenTree::token(token::Dollar, sp).into()); + result.push(TokenTree::Token(Token::from_ast_ident(ident)).into()); + } + } + + // If we are entering a new delimiter, we push its contents to the `stack` to be + // processed, and we push all of the currently produced results to the `result_stack`. + // We will produce all of the results of the inside of the `Delimited` and then we will + // jump back out of the Delimited, pop the result_stack and add the new results back to + // the previous results (from outside the Delimited). + quoted::TokenTree::Delimited(mut span, delimited) => { + marker.visit_delim_span(&mut span); + stack.push(Frame::Delimited { forest: delimited, idx: 0, span }); + result_stack.push(mem::take(&mut result)); + } + + // Nothing much to do here. Just push the token to the result, being careful to + // preserve syntax context. + quoted::TokenTree::Token(token) => { + let mut tt = TokenTree::Token(token); + marker.visit_tt(&mut tt); + result.push(tt.into()); + } + + // There should be no meta-var declarations in the invocation of a macro. + quoted::TokenTree::MetaVarDecl(..) => panic!("unexpected `TokenTree::MetaVarDecl"), + } + } +} + +/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using +/// the set of matches `interpolations`. +/// +/// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend +/// into the right place in nested matchers. If we attempt to descend too far, the macro writer has +/// made a mistake, and we return `None`. +fn lookup_cur_matched<'a>( + ident: Ident, + interpolations: &'a FxHashMap, + repeats: &[(usize, usize)], +) -> Option<&'a NamedMatch> { + interpolations.get(&ident).map(|matched| { + let mut matched = matched; + for &(idx, _) in repeats { + match matched { + MatchedNonterminal(_) => break, + MatchedSeq(ref ads, _) => matched = ads.get(idx).unwrap(), + } + } + + matched + }) +} + +/// An accumulator over a TokenTree to be used with `fold`. During transcription, we need to make +/// sure that the size of each sequence and all of its nested sequences are the same as the sizes +/// of all the matched (nested) sequences in the macro invocation. If they don't match, somebody +/// has made a mistake (either the macro writer or caller). +#[derive(Clone)] +enum LockstepIterSize { + /// No constraints on length of matcher. This is true for any TokenTree variants except a + /// `MetaVar` with an actual `MatchedSeq` (as opposed to a `MatchedNonterminal`). + Unconstrained, + + /// A `MetaVar` with an actual `MatchedSeq`. The length of the match and the name of the + /// meta-var are returned. + Constraint(usize, Ident), + + /// Two `Constraint`s on the same sequence had different lengths. This is an error. + Contradiction(String), +} + +impl LockstepIterSize { + /// Find incompatibilities in matcher/invocation sizes. + /// - `Unconstrained` is compatible with everything. + /// - `Contradiction` is incompatible with everything. + /// - `Constraint(len)` is only compatible with other constraints of the same length. + fn with(self, other: LockstepIterSize) -> LockstepIterSize { + match self { + LockstepIterSize::Unconstrained => other, + LockstepIterSize::Contradiction(_) => self, + LockstepIterSize::Constraint(l_len, ref l_id) => match other { + LockstepIterSize::Unconstrained => self, + LockstepIterSize::Contradiction(_) => other, + LockstepIterSize::Constraint(r_len, _) if l_len == r_len => self, + LockstepIterSize::Constraint(r_len, r_id) => { + let msg = format!( + "meta-variable `{}` repeats {} time{}, but `{}` repeats {} time{}", + l_id, + l_len, + pluralise!(l_len), + r_id, + r_len, + pluralise!(r_len), + ); + LockstepIterSize::Contradiction(msg) + } + }, + } + } +} + +/// Given a `tree`, make sure that all sequences have the same length as the matches for the +/// appropriate meta-vars in `interpolations`. +/// +/// Note that if `repeats` does not match the exact correct depth of a meta-var, +/// `lookup_cur_matched` will return `None`, which is why this still works even in the presnece of +/// multiple nested matcher sequences. +fn lockstep_iter_size( + tree: "ed::TokenTree, + interpolations: &FxHashMap, + repeats: &[(usize, usize)], +) -> LockstepIterSize { + use quoted::TokenTree; + match *tree { + TokenTree::Delimited(_, ref delimed) => { + delimed.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { + size.with(lockstep_iter_size(tt, interpolations, repeats)) + }) + } + TokenTree::Sequence(_, ref seq) => { + seq.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { + size.with(lockstep_iter_size(tt, interpolations, repeats)) + }) + } + TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => { + match lookup_cur_matched(name, interpolations, repeats) { + Some(matched) => match matched { + MatchedNonterminal(_) => LockstepIterSize::Unconstrained, + MatchedSeq(ref ads, _) => LockstepIterSize::Constraint(ads.len(), name), + }, + _ => LockstepIterSize::Unconstrained, + } + } + TokenTree::Token(..) => LockstepIterSize::Unconstrained, + } +} diff --git a/src/libsyntax/ext/tt/macro_check.rs b/src/libsyntax/ext/tt/macro_check.rs deleted file mode 100644 index a1734689595..00000000000 --- a/src/libsyntax/ext/tt/macro_check.rs +++ /dev/null @@ -1,626 +0,0 @@ -//! Checks that meta-variables in macro definition are correctly declared and used. -//! -//! # What is checked -//! -//! ## Meta-variables must not be bound twice -//! -//! ``` -//! macro_rules! foo { ($x:tt $x:tt) => { $x }; } -//! ``` -//! -//! This check is sound (no false-negative) and complete (no false-positive). -//! -//! ## Meta-variables must not be free -//! -//! ``` -//! macro_rules! foo { () => { $x }; } -//! ``` -//! -//! This check is also done at macro instantiation but only if the branch is taken. -//! -//! ## Meta-variables must repeat at least as many times as their binder -//! -//! ``` -//! macro_rules! foo { ($($x:tt)*) => { $x }; } -//! ``` -//! -//! This check is also done at macro instantiation but only if the branch is taken. -//! -//! ## Meta-variables must repeat with the same Kleene operators as their binder -//! -//! ``` -//! macro_rules! foo { ($($x:tt)+) => { $($x)* }; } -//! ``` -//! -//! This check is not done at macro instantiation. -//! -//! # Disclaimer -//! -//! In the presence of nested macros (a macro defined in a macro), those checks may have false -//! positives and false negatives. We try to detect those cases by recognizing potential macro -//! definitions in RHSes, but nested macros may be hidden through the use of particular values of -//! meta-variables. -//! -//! ## Examples of false positive -//! -//! False positives can come from cases where we don't recognize a nested macro, because it depends -//! on particular values of meta-variables. In the following example, we think both instances of -//! `$x` are free, which is a correct statement if `$name` is anything but `macro_rules`. But when -//! `$name` is `macro_rules`, like in the instantiation below, then `$x:tt` is actually a binder of -//! the nested macro and `$x` is bound to it. -//! -//! ``` -//! macro_rules! foo { ($name:ident) => { $name! bar { ($x:tt) => { $x }; } }; } -//! foo!(macro_rules); -//! ``` -//! -//! False positives can also come from cases where we think there is a nested macro while there -//! isn't. In the following example, we think `$x` is free, which is incorrect because `bar` is not -//! a nested macro since it is not evaluated as code by `stringify!`. -//! -//! ``` -//! macro_rules! foo { () => { stringify!(macro_rules! bar { () => { $x }; }) }; } -//! ``` -//! -//! ## Examples of false negative -//! -//! False negatives can come from cases where we don't recognize a meta-variable, because it depends -//! on particular values of meta-variables. In the following examples, we don't see that if `$d` is -//! instantiated with `$` then `$d z` becomes `$z` in the nested macro definition and is thus a free -//! meta-variable. Note however, that if `foo` is instantiated, then we would check the definition -//! of `bar` and would see the issue. -//! -//! ``` -//! macro_rules! foo { ($d:tt) => { macro_rules! bar { ($y:tt) => { $d z }; } }; } -//! ``` -//! -//! # How it is checked -//! -//! There are 3 main functions: `check_binders`, `check_occurrences`, and `check_nested_macro`. They -//! all need some kind of environment. -//! -//! ## Environments -//! -//! Environments are used to pass information. -//! -//! ### From LHS to RHS -//! -//! When checking a LHS with `check_binders`, we produce (and use) an environment for binders, -//! namely `Binders`. This is a mapping from binder name to information about that binder: the span -//! of the binder for error messages and the stack of Kleene operators under which it was bound in -//! the LHS. -//! -//! This environment is used by both the LHS and RHS. The LHS uses it to detect duplicate binders. -//! The RHS uses it to detect the other errors. -//! -//! ### From outer macro to inner macro -//! -//! When checking the RHS of an outer macro and we detect a nested macro definition, we push the -//! current state, namely `MacroState`, to an environment of nested macro definitions. Each state -//! stores the LHS binders when entering the macro definition as well as the stack of Kleene -//! operators under which the inner macro is defined in the RHS. -//! -//! This environment is a stack representing the nesting of macro definitions. As such, the stack of -//! Kleene operators under which a meta-variable is repeating is the concatenation of the stacks -//! stored when entering a macro definition starting from the state in which the meta-variable is -//! bound. -use crate::ast::NodeId; -use crate::early_buffered_lints::BufferedEarlyLintId; -use crate::ext::tt::quoted::{KleeneToken, TokenTree}; -use crate::parse::token::TokenKind; -use crate::parse::token::{DelimToken, Token}; -use crate::parse::ParseSess; -use crate::symbol::{kw, sym}; - -use rustc_data_structures::fx::FxHashMap; -use smallvec::SmallVec; -use syntax_pos::{symbol::Ident, MultiSpan, Span}; - -/// Stack represented as linked list. -/// -/// Those are used for environments because they grow incrementally and are not mutable. -enum Stack<'a, T> { - /// Empty stack. - Empty, - /// A non-empty stack. - Push { - /// The top element. - top: T, - /// The previous elements. - prev: &'a Stack<'a, T>, - }, -} - -impl<'a, T> Stack<'a, T> { - /// Returns whether a stack is empty. - fn is_empty(&self) -> bool { - match *self { - Stack::Empty => true, - _ => false, - } - } - - /// Returns a new stack with an element of top. - fn push(&'a self, top: T) -> Stack<'a, T> { - Stack::Push { top, prev: self } - } -} - -impl<'a, T> Iterator for &'a Stack<'a, T> { - type Item = &'a T; - - // Iterates from top to bottom of the stack. - fn next(&mut self) -> Option<&'a T> { - match *self { - Stack::Empty => None, - Stack::Push { ref top, ref prev } => { - *self = prev; - Some(top) - } - } - } -} - -impl From<&Stack<'_, KleeneToken>> for SmallVec<[KleeneToken; 1]> { - fn from(ops: &Stack<'_, KleeneToken>) -> SmallVec<[KleeneToken; 1]> { - let mut ops: SmallVec<[KleeneToken; 1]> = ops.cloned().collect(); - // The stack is innermost on top. We want outermost first. - ops.reverse(); - ops - } -} - -/// Information attached to a meta-variable binder in LHS. -struct BinderInfo { - /// The span of the meta-variable in LHS. - span: Span, - /// The stack of Kleene operators (outermost first). - ops: SmallVec<[KleeneToken; 1]>, -} - -/// An environment of meta-variables to their binder information. -type Binders = FxHashMap; - -/// The state at which we entered a macro definition in the RHS of another macro definition. -struct MacroState<'a> { - /// The binders of the branch where we entered the macro definition. - binders: &'a Binders, - /// The stack of Kleene operators (outermost first) where we entered the macro definition. - ops: SmallVec<[KleeneToken; 1]>, -} - -/// Checks that meta-variables are used correctly in a macro definition. -/// -/// Arguments: -/// - `sess` is used to emit diagnostics and lints -/// - `node_id` is used to emit lints -/// - `span` is used when no spans are available -/// - `lhses` and `rhses` should have the same length and represent the macro definition -crate fn check_meta_variables( - sess: &ParseSess, - node_id: NodeId, - span: Span, - lhses: &[TokenTree], - rhses: &[TokenTree], -) -> bool { - if lhses.len() != rhses.len() { - sess.span_diagnostic.span_bug(span, "length mismatch between LHSes and RHSes") - } - let mut valid = true; - for (lhs, rhs) in lhses.iter().zip(rhses.iter()) { - let mut binders = Binders::default(); - check_binders(sess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut valid); - check_occurrences(sess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut valid); - } - valid -} - -/// Checks `lhs` as part of the LHS of a macro definition, extends `binders` with new binders, and -/// sets `valid` to false in case of errors. -/// -/// Arguments: -/// - `sess` is used to emit diagnostics and lints -/// - `node_id` is used to emit lints -/// - `lhs` is checked as part of a LHS -/// - `macros` is the stack of possible outer macros -/// - `binders` contains the binders of the LHS -/// - `ops` is the stack of Kleene operators from the LHS -/// - `valid` is set in case of errors -fn check_binders( - sess: &ParseSess, - node_id: NodeId, - lhs: &TokenTree, - macros: &Stack<'_, MacroState<'_>>, - binders: &mut Binders, - ops: &Stack<'_, KleeneToken>, - valid: &mut bool, -) { - match *lhs { - TokenTree::Token(..) => {} - // This can only happen when checking a nested macro because this LHS is then in the RHS of - // the outer macro. See ui/macros/macro-of-higher-order.rs where $y:$fragment in the - // LHS of the nested macro (and RHS of the outer macro) is parsed as MetaVar(y) Colon - // MetaVar(fragment) and not as MetaVarDecl(y, fragment). - TokenTree::MetaVar(span, name) => { - if macros.is_empty() { - sess.span_diagnostic.span_bug(span, "unexpected MetaVar in lhs"); - } - // There are 3 possibilities: - if let Some(prev_info) = binders.get(&name) { - // 1. The meta-variable is already bound in the current LHS: This is an error. - let mut span = MultiSpan::from_span(span); - span.push_span_label(prev_info.span, "previous declaration".into()); - buffer_lint(sess, span, node_id, "duplicate matcher binding"); - } else if get_binder_info(macros, binders, name).is_none() { - // 2. The meta-variable is free: This is a binder. - binders.insert(name, BinderInfo { span, ops: ops.into() }); - } else { - // 3. The meta-variable is bound: This is an occurrence. - check_occurrences(sess, node_id, lhs, macros, binders, ops, valid); - } - } - // Similarly, this can only happen when checking a toplevel macro. - TokenTree::MetaVarDecl(span, name, _kind) => { - if !macros.is_empty() { - sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in nested lhs"); - } - if let Some(prev_info) = get_binder_info(macros, binders, name) { - // Duplicate binders at the top-level macro definition are errors. The lint is only - // for nested macro definitions. - sess.span_diagnostic - .struct_span_err(span, "duplicate matcher binding") - .span_note(prev_info.span, "previous declaration was here") - .emit(); - *valid = false; - } else { - binders.insert(name, BinderInfo { span, ops: ops.into() }); - } - } - TokenTree::Delimited(_, ref del) => { - for tt in &del.tts { - check_binders(sess, node_id, tt, macros, binders, ops, valid); - } - } - TokenTree::Sequence(_, ref seq) => { - let ops = ops.push(seq.kleene); - for tt in &seq.tts { - check_binders(sess, node_id, tt, macros, binders, &ops, valid); - } - } - } -} - -/// Returns the binder information of a meta-variable. -/// -/// Arguments: -/// - `macros` is the stack of possible outer macros -/// - `binders` contains the current binders -/// - `name` is the name of the meta-variable we are looking for -fn get_binder_info<'a>( - mut macros: &'a Stack<'a, MacroState<'a>>, - binders: &'a Binders, - name: Ident, -) -> Option<&'a BinderInfo> { - binders.get(&name).or_else(|| macros.find_map(|state| state.binders.get(&name))) -} - -/// Checks `rhs` as part of the RHS of a macro definition and sets `valid` to false in case of -/// errors. -/// -/// Arguments: -/// - `sess` is used to emit diagnostics and lints -/// - `node_id` is used to emit lints -/// - `rhs` is checked as part of a RHS -/// - `macros` is the stack of possible outer macros -/// - `binders` contains the binders of the associated LHS -/// - `ops` is the stack of Kleene operators from the RHS -/// - `valid` is set in case of errors -fn check_occurrences( - sess: &ParseSess, - node_id: NodeId, - rhs: &TokenTree, - macros: &Stack<'_, MacroState<'_>>, - binders: &Binders, - ops: &Stack<'_, KleeneToken>, - valid: &mut bool, -) { - match *rhs { - TokenTree::Token(..) => {} - TokenTree::MetaVarDecl(span, _name, _kind) => { - sess.span_diagnostic.span_bug(span, "unexpected MetaVarDecl in rhs") - } - TokenTree::MetaVar(span, name) => { - check_ops_is_prefix(sess, node_id, macros, binders, ops, span, name); - } - TokenTree::Delimited(_, ref del) => { - check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid); - } - TokenTree::Sequence(_, ref seq) => { - let ops = ops.push(seq.kleene); - check_nested_occurrences(sess, node_id, &seq.tts, macros, binders, &ops, valid); - } - } -} - -/// Represents the processed prefix of a nested macro. -#[derive(Clone, Copy, PartialEq, Eq)] -enum NestedMacroState { - /// Nothing that matches a nested macro definition was processed yet. - Empty, - /// The token `macro_rules` was processed. - MacroRules, - /// The tokens `macro_rules!` were processed. - MacroRulesNot, - /// The tokens `macro_rules!` followed by a name were processed. The name may be either directly - /// an identifier or a meta-variable (that hopefully would be instantiated by an identifier). - MacroRulesNotName, - /// The keyword `macro` was processed. - Macro, - /// The keyword `macro` followed by a name was processed. - MacroName, - /// The keyword `macro` followed by a name and a token delimited by parentheses was processed. - MacroNameParen, -} - -/// Checks `tts` as part of the RHS of a macro definition, tries to recognize nested macro -/// definitions, and sets `valid` to false in case of errors. -/// -/// Arguments: -/// - `sess` is used to emit diagnostics and lints -/// - `node_id` is used to emit lints -/// - `tts` is checked as part of a RHS and may contain macro definitions -/// - `macros` is the stack of possible outer macros -/// - `binders` contains the binders of the associated LHS -/// - `ops` is the stack of Kleene operators from the RHS -/// - `valid` is set in case of errors -fn check_nested_occurrences( - sess: &ParseSess, - node_id: NodeId, - tts: &[TokenTree], - macros: &Stack<'_, MacroState<'_>>, - binders: &Binders, - ops: &Stack<'_, KleeneToken>, - valid: &mut bool, -) { - let mut state = NestedMacroState::Empty; - let nested_macros = macros.push(MacroState { binders, ops: ops.into() }); - let mut nested_binders = Binders::default(); - for tt in tts { - match (state, tt) { - ( - NestedMacroState::Empty, - &TokenTree::Token(Token { kind: TokenKind::Ident(name, false), .. }), - ) => { - if name == sym::macro_rules { - state = NestedMacroState::MacroRules; - } else if name == kw::Macro { - state = NestedMacroState::Macro; - } - } - ( - NestedMacroState::MacroRules, - &TokenTree::Token(Token { kind: TokenKind::Not, .. }), - ) => { - state = NestedMacroState::MacroRulesNot; - } - ( - NestedMacroState::MacroRulesNot, - &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), - ) => { - state = NestedMacroState::MacroRulesNotName; - } - (NestedMacroState::MacroRulesNot, &TokenTree::MetaVar(..)) => { - state = NestedMacroState::MacroRulesNotName; - // We check that the meta-variable is correctly used. - check_occurrences(sess, node_id, tt, macros, binders, ops, valid); - } - (NestedMacroState::MacroRulesNotName, &TokenTree::Delimited(_, ref del)) - | (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) - if del.delim == DelimToken::Brace => - { - let legacy = state == NestedMacroState::MacroRulesNotName; - state = NestedMacroState::Empty; - let rest = - check_nested_macro(sess, node_id, legacy, &del.tts, &nested_macros, valid); - // If we did not check the whole macro definition, then check the rest as if outside - // the macro definition. - check_nested_occurrences( - sess, - node_id, - &del.tts[rest..], - macros, - binders, - ops, - valid, - ); - } - ( - NestedMacroState::Macro, - &TokenTree::Token(Token { kind: TokenKind::Ident(..), .. }), - ) => { - state = NestedMacroState::MacroName; - } - (NestedMacroState::Macro, &TokenTree::MetaVar(..)) => { - state = NestedMacroState::MacroName; - // We check that the meta-variable is correctly used. - check_occurrences(sess, node_id, tt, macros, binders, ops, valid); - } - (NestedMacroState::MacroName, &TokenTree::Delimited(_, ref del)) - if del.delim == DelimToken::Paren => - { - state = NestedMacroState::MacroNameParen; - nested_binders = Binders::default(); - check_binders( - sess, - node_id, - tt, - &nested_macros, - &mut nested_binders, - &Stack::Empty, - valid, - ); - } - (NestedMacroState::MacroNameParen, &TokenTree::Delimited(_, ref del)) - if del.delim == DelimToken::Brace => - { - state = NestedMacroState::Empty; - check_occurrences( - sess, - node_id, - tt, - &nested_macros, - &nested_binders, - &Stack::Empty, - valid, - ); - } - (_, ref tt) => { - state = NestedMacroState::Empty; - check_occurrences(sess, node_id, tt, macros, binders, ops, valid); - } - } - } -} - -/// Checks the body of nested macro, returns where the check stopped, and sets `valid` to false in -/// case of errors. -/// -/// The token trees are checked as long as they look like a list of (LHS) => {RHS} token trees. This -/// check is a best-effort to detect a macro definition. It returns the position in `tts` where we -/// stopped checking because we detected we were not in a macro definition anymore. -/// -/// Arguments: -/// - `sess` is used to emit diagnostics and lints -/// - `node_id` is used to emit lints -/// - `legacy` specifies whether the macro is legacy -/// - `tts` is checked as a list of (LHS) => {RHS} -/// - `macros` is the stack of outer macros -/// - `valid` is set in case of errors -fn check_nested_macro( - sess: &ParseSess, - node_id: NodeId, - legacy: bool, - tts: &[TokenTree], - macros: &Stack<'_, MacroState<'_>>, - valid: &mut bool, -) -> usize { - let n = tts.len(); - let mut i = 0; - let separator = if legacy { TokenKind::Semi } else { TokenKind::Comma }; - loop { - // We expect 3 token trees: `(LHS) => {RHS}`. The separator is checked after. - if i + 2 >= n - || !tts[i].is_delimited() - || !tts[i + 1].is_token(&TokenKind::FatArrow) - || !tts[i + 2].is_delimited() - { - break; - } - let lhs = &tts[i]; - let rhs = &tts[i + 2]; - let mut binders = Binders::default(); - check_binders(sess, node_id, lhs, macros, &mut binders, &Stack::Empty, valid); - check_occurrences(sess, node_id, rhs, macros, &binders, &Stack::Empty, valid); - // Since the last semicolon is optional for legacy macros and decl_macro are not terminated, - // we increment our checked position by how many token trees we already checked (the 3 - // above) before checking for the separator. - i += 3; - if i == n || !tts[i].is_token(&separator) { - break; - } - // We increment our checked position for the semicolon. - i += 1; - } - i -} - -/// Checks that a meta-variable occurrence is valid. -/// -/// Arguments: -/// - `sess` is used to emit diagnostics and lints -/// - `node_id` is used to emit lints -/// - `macros` is the stack of possible outer macros -/// - `binders` contains the binders of the associated LHS -/// - `ops` is the stack of Kleene operators from the RHS -/// - `span` is the span of the meta-variable to check -/// - `name` is the name of the meta-variable to check -fn check_ops_is_prefix( - sess: &ParseSess, - node_id: NodeId, - macros: &Stack<'_, MacroState<'_>>, - binders: &Binders, - ops: &Stack<'_, KleeneToken>, - span: Span, - name: Ident, -) { - let macros = macros.push(MacroState { binders, ops: ops.into() }); - // Accumulates the stacks the operators of each state until (and including when) the - // meta-variable is found. The innermost stack is first. - let mut acc: SmallVec<[&SmallVec<[KleeneToken; 1]>; 1]> = SmallVec::new(); - for state in ¯os { - acc.push(&state.ops); - if let Some(binder) = state.binders.get(&name) { - // This variable concatenates the stack of operators from the RHS of the LHS where the - // meta-variable was defined to where it is used (in possibly nested macros). The - // outermost operator is first. - let mut occurrence_ops: SmallVec<[KleeneToken; 2]> = SmallVec::new(); - // We need to iterate from the end to start with outermost stack. - for ops in acc.iter().rev() { - occurrence_ops.extend_from_slice(ops); - } - ops_is_prefix(sess, node_id, span, name, &binder.ops, &occurrence_ops); - return; - } - } - buffer_lint(sess, span.into(), node_id, &format!("unknown macro variable `{}`", name)); -} - -/// Returns whether `binder_ops` is a prefix of `occurrence_ops`. -/// -/// The stack of Kleene operators of a meta-variable occurrence just needs to have the stack of -/// Kleene operators of its binder as a prefix. -/// -/// Consider $i in the following example: -/// -/// ( $( $i:ident = $($j:ident),+ );* ) => { $($( $i += $j; )+)* } -/// -/// It occurs under the Kleene stack ["*", "+"] and is bound under ["*"] only. -/// -/// Arguments: -/// - `sess` is used to emit diagnostics and lints -/// - `node_id` is used to emit lints -/// - `span` is the span of the meta-variable being check -/// - `name` is the name of the meta-variable being check -/// - `binder_ops` is the stack of Kleene operators for the binder -/// - `occurrence_ops` is the stack of Kleene operators for the occurrence -fn ops_is_prefix( - sess: &ParseSess, - node_id: NodeId, - span: Span, - name: Ident, - binder_ops: &[KleeneToken], - occurrence_ops: &[KleeneToken], -) { - for (i, binder) in binder_ops.iter().enumerate() { - if i >= occurrence_ops.len() { - let mut span = MultiSpan::from_span(span); - span.push_span_label(binder.span, "expected repetition".into()); - let message = &format!("variable '{}' is still repeating at this depth", name); - buffer_lint(sess, span, node_id, message); - return; - } - let occurrence = &occurrence_ops[i]; - if occurrence.op != binder.op { - let mut span = MultiSpan::from_span(span); - span.push_span_label(binder.span, "expected repetition".into()); - span.push_span_label(occurrence.span, "conflicting repetition".into()); - let message = "meta-variable repeats with different Kleene operator"; - buffer_lint(sess, span, node_id, message); - return; - } - } -} - -fn buffer_lint(sess: &ParseSess, span: MultiSpan, node_id: NodeId, message: &str) { - sess.buffer_lint(BufferedEarlyLintId::MetaVariableMisuse, span, node_id, message); -} diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs deleted file mode 100644 index a34a0344f27..00000000000 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ /dev/null @@ -1,952 +0,0 @@ -//! This is an NFA-based parser, which calls out to the main rust parser for named non-terminals -//! (which it commits to fully when it hits one in a grammar). There's a set of current NFA threads -//! and a set of next ones. Instead of NTs, we have a special case for Kleene star. The big-O, in -//! pathological cases, is worse than traditional use of NFA or Earley parsing, but it's an easier -//! fit for Macro-by-Example-style rules. -//! -//! (In order to prevent the pathological case, we'd need to lazily construct the resulting -//! `NamedMatch`es at the very end. It'd be a pain, and require more memory to keep around old -//! items, but it would also save overhead) -//! -//! We don't say this parser uses the Earley algorithm, because it's unnecessarily inaccurate. -//! The macro parser restricts itself to the features of finite state automata. Earley parsers -//! can be described as an extension of NFAs with completion rules, prediction rules, and recursion. -//! -//! Quick intro to how the parser works: -//! -//! A 'position' is a dot in the middle of a matcher, usually represented as a -//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. -//! -//! The parser walks through the input a character at a time, maintaining a list -//! of threads consistent with the current position in the input string: `cur_items`. -//! -//! As it processes them, it fills up `eof_items` with threads that would be valid if -//! the macro invocation is now over, `bb_items` with threads that are waiting on -//! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting -//! on a particular token. Most of the logic concerns moving the · through the -//! repetitions indicated by Kleene stars. The rules for moving the · without -//! consuming any input are called epsilon transitions. It only advances or calls -//! out to the real Rust parser when no `cur_items` threads remain. -//! -//! Example: -//! -//! ```text, ignore -//! Start parsing a a a a b against [· a $( a )* a b]. -//! -//! Remaining input: a a a a b -//! next: [· a $( a )* a b] -//! -//! - - - Advance over an a. - - - -//! -//! Remaining input: a a a b -//! cur: [a · $( a )* a b] -//! Descend/Skip (first item). -//! next: [a $( · a )* a b] [a $( a )* · a b]. -//! -//! - - - Advance over an a. - - - -//! -//! Remaining input: a a b -//! cur: [a $( a · )* a b] [a $( a )* a · b] -//! Follow epsilon transition: Finish/Repeat (first item) -//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] -//! -//! - - - Advance over an a. - - - (this looks exactly like the last step) -//! -//! Remaining input: a b -//! cur: [a $( a · )* a b] [a $( a )* a · b] -//! Follow epsilon transition: Finish/Repeat (first item) -//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] -//! -//! - - - Advance over an a. - - - (this looks exactly like the last step) -//! -//! Remaining input: b -//! cur: [a $( a · )* a b] [a $( a )* a · b] -//! Follow epsilon transition: Finish/Repeat (first item) -//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] -//! -//! - - - Advance over a b. - - - -//! -//! Remaining input: '' -//! eof: [a $( a )* a b ·] -//! ``` - -crate use NamedMatch::*; -crate use ParseResult::*; -use TokenTreeOrTokenTreeSlice::*; - -use crate::ast::{Ident, Name}; -use crate::ext::tt::quoted::{self, TokenTree}; -use crate::parse::{Directory, ParseSess}; -use crate::parse::parser::{Parser, PathStyle}; -use crate::parse::token::{self, DocComment, Nonterminal, Token}; -use crate::print::pprust; -use crate::symbol::{kw, sym, Symbol}; -use crate::tokenstream::{DelimSpan, TokenStream}; - -use errors::FatalError; -use smallvec::{smallvec, SmallVec}; -use syntax_pos::Span; - -use rustc_data_structures::fx::FxHashMap; -use rustc_data_structures::sync::Lrc; -use std::collections::hash_map::Entry::{Occupied, Vacant}; -use std::mem; -use std::ops::{Deref, DerefMut}; - -// To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. - -/// Either a sequence of token trees or a single one. This is used as the representation of the -/// sequence of tokens that make up a matcher. -#[derive(Clone)] -enum TokenTreeOrTokenTreeSlice<'tt> { - Tt(TokenTree), - TtSeq(&'tt [TokenTree]), -} - -impl<'tt> TokenTreeOrTokenTreeSlice<'tt> { - /// Returns the number of constituent top-level token trees of `self` (top-level in that it - /// will not recursively descend into subtrees). - fn len(&self) -> usize { - match *self { - TtSeq(ref v) => v.len(), - Tt(ref tt) => tt.len(), - } - } - - /// The `index`-th token tree of `self`. - fn get_tt(&self, index: usize) -> TokenTree { - match *self { - TtSeq(ref v) => v[index].clone(), - Tt(ref tt) => tt.get_tt(index), - } - } -} - -/// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`. -/// -/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have -/// descended into. -#[derive(Clone)] -struct MatcherTtFrame<'tt> { - /// The "parent" matcher that we are descending into. - elts: TokenTreeOrTokenTreeSlice<'tt>, - /// The position of the "dot" in `elts` at the time we descended. - idx: usize, -} - -type NamedMatchVec = SmallVec<[NamedMatch; 4]>; - -/// Represents a single "position" (aka "matcher position", aka "item"), as -/// described in the module documentation. -/// -/// Here: -/// -/// - `'root` represents the lifetime of the stack slot that holds the root -/// `MatcherPos`. As described in `MatcherPosHandle`, the root `MatcherPos` -/// structure is stored on the stack, but subsequent instances are put into -/// the heap. -/// - `'tt` represents the lifetime of the token trees that this matcher -/// position refers to. -/// -/// It is important to distinguish these two lifetimes because we have a -/// `SmallVec>` below, and the destructor of -/// that is considered to possibly access the data from its elements (it lacks -/// a `#[may_dangle]` attribute). As a result, the compiler needs to know that -/// all the elements in that `SmallVec` strictly outlive the root stack slot -/// lifetime. By separating `'tt` from `'root`, we can show that. -#[derive(Clone)] -struct MatcherPos<'root, 'tt> { - /// The token or sequence of tokens that make up the matcher - top_elts: TokenTreeOrTokenTreeSlice<'tt>, - - /// The position of the "dot" in this matcher - idx: usize, - - /// The first span of source that the beginning of this matcher corresponds to. In other - /// words, the token in the source whose span is `sp_open` is matched against the first token of - /// the matcher. - sp_open: Span, - - /// For each named metavar in the matcher, we keep track of token trees matched against the - /// metavar by the black box parser. In particular, there may be more than one match per - /// metavar if we are in a repetition (each repetition matches each of the variables). - /// Moreover, matchers and repetitions can be nested; the `matches` field is shared (hence the - /// `Rc`) among all "nested" matchers. `match_lo`, `match_cur`, and `match_hi` keep track of - /// the current position of the `self` matcher position in the shared `matches` list. - /// - /// Also, note that while we are descending into a sequence, matchers are given their own - /// `matches` vector. Only once we reach the end of a full repetition of the sequence do we add - /// all bound matches from the submatcher into the shared top-level `matches` vector. If `sep` - /// and `up` are `Some`, then `matches` is _not_ the shared top-level list. Instead, if one - /// wants the shared `matches`, one should use `up.matches`. - matches: Box<[Lrc]>, - /// The position in `matches` corresponding to the first metavar in this matcher's sequence of - /// token trees. In other words, the first metavar in the first token of `top_elts` corresponds - /// to `matches[match_lo]`. - match_lo: usize, - /// The position in `matches` corresponding to the metavar we are currently trying to match - /// against the source token stream. `match_lo <= match_cur <= match_hi`. - match_cur: usize, - /// Similar to `match_lo` except `match_hi` is the position in `matches` of the _last_ metavar - /// in this matcher. - match_hi: usize, - - // The following fields are used if we are matching a repetition. If we aren't, they should be - // `None`. - - /// The KleeneOp of this sequence if we are in a repetition. - seq_op: Option, - - /// The separator if we are in a repetition. - sep: Option, - - /// The "parent" matcher position if we are in a repetition. That is, the matcher position just - /// before we enter the sequence. - up: Option>, - - /// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from - /// a delimited token tree (e.g., something wrapped in `(` `)`) or to get the contents of a doc - /// comment... - /// - /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) - /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does - /// that where the bottom of the stack is the outermost matcher. - /// Also, throughout the comments, this "descent" is often referred to as "unzipping"... - stack: SmallVec<[MatcherTtFrame<'tt>; 1]>, -} - -impl<'root, 'tt> MatcherPos<'root, 'tt> { - /// Adds `m` as a named match for the `idx`-th metavar. - fn push_match(&mut self, idx: usize, m: NamedMatch) { - let matches = Lrc::make_mut(&mut self.matches[idx]); - matches.push(m); - } -} - -// Lots of MatcherPos instances are created at runtime. Allocating them on the -// heap is slow. Furthermore, using SmallVec to allocate them all -// on the stack is also slow, because MatcherPos is quite a large type and -// instances get moved around a lot between vectors, which requires lots of -// slow memcpy calls. -// -// Therefore, the initial MatcherPos is always allocated on the stack, -// subsequent ones (of which there aren't that many) are allocated on the heap, -// and this type is used to encapsulate both cases. -enum MatcherPosHandle<'root, 'tt> { - Ref(&'root mut MatcherPos<'root, 'tt>), - Box(Box>), -} - -impl<'root, 'tt> Clone for MatcherPosHandle<'root, 'tt> { - // This always produces a new Box. - fn clone(&self) -> Self { - MatcherPosHandle::Box(match *self { - MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()), - MatcherPosHandle::Box(ref b) => b.clone(), - }) - } -} - -impl<'root, 'tt> Deref for MatcherPosHandle<'root, 'tt> { - type Target = MatcherPos<'root, 'tt>; - fn deref(&self) -> &Self::Target { - match *self { - MatcherPosHandle::Ref(ref r) => r, - MatcherPosHandle::Box(ref b) => b, - } - } -} - -impl<'root, 'tt> DerefMut for MatcherPosHandle<'root, 'tt> { - fn deref_mut(&mut self) -> &mut MatcherPos<'root, 'tt> { - match *self { - MatcherPosHandle::Ref(ref mut r) => r, - MatcherPosHandle::Box(ref mut b) => b, - } - } -} - -/// Represents the possible results of an attempted parse. -crate enum ParseResult { - /// Parsed successfully. - Success(T), - /// Arm failed to match. If the second parameter is `token::Eof`, it indicates an unexpected - /// end of macro invocation. Otherwise, it indicates that no rules expected the given token. - Failure(Token, &'static str), - /// Fatal error (malformed macro?). Abort compilation. - Error(syntax_pos::Span, String), -} - -/// A `ParseResult` where the `Success` variant contains a mapping of `Ident`s to `NamedMatch`es. -/// This represents the mapping of metavars to the token trees they bind to. -crate type NamedParseResult = ParseResult>; - -/// Count how many metavars are named in the given matcher `ms`. -crate fn count_names(ms: &[TokenTree]) -> usize { - ms.iter().fold(0, |count, elt| { - count + match *elt { - TokenTree::Sequence(_, ref seq) => seq.num_captures, - TokenTree::Delimited(_, ref delim) => count_names(&delim.tts), - TokenTree::MetaVar(..) => 0, - TokenTree::MetaVarDecl(..) => 1, - TokenTree::Token(..) => 0, - } - }) -} - -/// `len` `Vec`s (initially shared and empty) that will store matches of metavars. -fn create_matches(len: usize) -> Box<[Lrc]> { - if len == 0 { - vec![] - } else { - let empty_matches = Lrc::new(SmallVec::new()); - vec![empty_matches; len] - }.into_boxed_slice() -} - -/// Generates the top-level matcher position in which the "dot" is before the first token of the -/// matcher `ms` and we are going to start matching at the span `open` in the source. -fn initial_matcher_pos<'root, 'tt>(ms: &'tt [TokenTree], open: Span) -> MatcherPos<'root, 'tt> { - let match_idx_hi = count_names(ms); - let matches = create_matches(match_idx_hi); - MatcherPos { - // Start with the top level matcher given to us - top_elts: TtSeq(ms), // "elts" is an abbr. for "elements" - // The "dot" is before the first token of the matcher - idx: 0, - // We start matching at the span `open` in the source code - sp_open: open, - - // Initialize `matches` to a bunch of empty `Vec`s -- one for each metavar in `top_elts`. - // `match_lo` for `top_elts` is 0 and `match_hi` is `matches.len()`. `match_cur` is 0 since - // we haven't actually matched anything yet. - matches, - match_lo: 0, - match_cur: 0, - match_hi: match_idx_hi, - - // Haven't descended into any delimiters, so empty stack - stack: smallvec![], - - // Haven't descended into any sequences, so both of these are `None`. - seq_op: None, - sep: None, - up: None, - } -} - -/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`: -/// so it is associated with a single ident in a parse, and all -/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type -/// (expr, item, etc). Each leaf in a single `NamedMatch` corresponds to a -/// single `token::MATCH_NONTERMINAL` in the `TokenTree` that produced it. -/// -/// The in-memory structure of a particular `NamedMatch` represents the match -/// that occurred when a particular subset of a matcher was applied to a -/// particular token tree. -/// -/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of -/// the `MatchedNonterminal`s, will depend on the token tree it was applied -/// to: each `MatchedSeq` corresponds to a single `TTSeq` in the originating -/// token tree. The depth of the `NamedMatch` structure will therefore depend -/// only on the nesting depth of `ast::TTSeq`s in the originating -/// token tree it was derived from. -#[derive(Debug, Clone)] -crate enum NamedMatch { - MatchedSeq(Lrc, DelimSpan), - MatchedNonterminal(Lrc), -} - -/// Takes a sequence of token trees `ms` representing a matcher which successfully matched input -/// and an iterator of items that matched input and produces a `NamedParseResult`. -fn nameize>( - sess: &ParseSess, - ms: &[TokenTree], - mut res: I, -) -> NamedParseResult { - // Recursively descend into each type of matcher (e.g., sequences, delimited, metavars) and make - // sure that each metavar has _exactly one_ binding. If a metavar does not have exactly one - // binding, then there is an error. If it does, then we insert the binding into the - // `NamedParseResult`. - fn n_rec>( - sess: &ParseSess, - m: &TokenTree, - res: &mut I, - ret_val: &mut FxHashMap, - ) -> Result<(), (syntax_pos::Span, String)> { - match *m { - TokenTree::Sequence(_, ref seq) => for next_m in &seq.tts { - n_rec(sess, next_m, res.by_ref(), ret_val)? - }, - TokenTree::Delimited(_, ref delim) => for next_m in &delim.tts { - n_rec(sess, next_m, res.by_ref(), ret_val)?; - }, - TokenTree::MetaVarDecl(span, _, id) if id.name == kw::Invalid => { - if sess.missing_fragment_specifiers.borrow_mut().remove(&span) { - return Err((span, "missing fragment specifier".to_string())); - } - } - TokenTree::MetaVarDecl(sp, bind_name, _) => { - match ret_val.entry(bind_name) { - Vacant(spot) => { - spot.insert(res.next().unwrap()); - } - Occupied(..) => { - return Err((sp, format!("duplicated bind name: {}", bind_name))) - } - } - } - TokenTree::MetaVar(..) | TokenTree::Token(..) => (), - } - - Ok(()) - } - - let mut ret_val = FxHashMap::default(); - for m in ms { - match n_rec(sess, m, res.by_ref(), &mut ret_val) { - Ok(_) => {} - Err((sp, msg)) => return Error(sp, msg), - } - } - - Success(ret_val) -} - -/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For -/// other tokens, this is "unexpected token...". -crate fn parse_failure_msg(tok: &Token) -> String { - match tok.kind { - token::Eof => "unexpected end of macro invocation".to_string(), - _ => format!( - "no rules expected the token `{}`", - pprust::token_to_string(tok) - ), - } -} - -/// Performs a token equality check, ignoring syntax context (that is, an unhygienic comparison) -fn token_name_eq(t1: &Token, t2: &Token) -> bool { - if let (Some((ident1, is_raw1)), Some((ident2, is_raw2))) = (t1.ident(), t2.ident()) { - ident1.name == ident2.name && is_raw1 == is_raw2 - } else if let (Some(ident1), Some(ident2)) = (t1.lifetime(), t2.lifetime()) { - ident1.name == ident2.name - } else { - t1.kind == t2.kind - } -} - -/// Process the matcher positions of `cur_items` until it is empty. In the process, this will -/// produce more items in `next_items`, `eof_items`, and `bb_items`. -/// -/// For more info about the how this happens, see the module-level doc comments and the inline -/// comments of this function. -/// -/// # Parameters -/// -/// - `sess`: the parsing session into which errors are emitted. -/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a -/// successful execution of this function. -/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in -/// the function `parse`. -/// - `eof_items`: the set of items that would be valid if this was the EOF. -/// - `bb_items`: the set of items that are waiting for the black-box parser. -/// - `token`: the current token of the parser. -/// - `span`: the `Span` in the source code corresponding to the token trees we are trying to match -/// against the matcher positions in `cur_items`. -/// -/// # Returns -/// -/// A `ParseResult`. Note that matches are kept track of through the items generated. -fn inner_parse_loop<'root, 'tt>( - sess: &ParseSess, - cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - next_items: &mut Vec>, - eof_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - token: &Token, -) -> ParseResult<()> { - // Pop items from `cur_items` until it is empty. - while let Some(mut item) = cur_items.pop() { - // When unzipped trees end, remove them. This corresponds to backtracking out of a - // delimited submatcher into which we already descended. In backtracking out again, we need - // to advance the "dot" past the delimiters in the outer matcher. - while item.idx >= item.top_elts.len() { - match item.stack.pop() { - Some(MatcherTtFrame { elts, idx }) => { - item.top_elts = elts; - item.idx = idx + 1; - } - None => break, - } - } - - // Get the current position of the "dot" (`idx`) in `item` and the number of token trees in - // the matcher (`len`). - let idx = item.idx; - let len = item.top_elts.len(); - - // If `idx >= len`, then we are at or past the end of the matcher of `item`. - if idx >= len { - // We are repeating iff there is a parent. If the matcher is inside of a repetition, - // then we could be at the end of a sequence or at the beginning of the next - // repetition. - if item.up.is_some() { - // At this point, regardless of whether there is a separator, we should add all - // matches from the complete repetition of the sequence to the shared, top-level - // `matches` list (actually, `up.matches`, which could itself not be the top-level, - // but anyway...). Moreover, we add another item to `cur_items` in which the "dot" - // is at the end of the `up` matcher. This ensures that the "dot" in the `up` - // matcher is also advanced sufficiently. - // - // NOTE: removing the condition `idx == len` allows trailing separators. - if idx == len { - // Get the `up` matcher - let mut new_pos = item.up.clone().unwrap(); - - // Add matches from this repetition to the `matches` of `up` - for idx in item.match_lo..item.match_hi { - let sub = item.matches[idx].clone(); - let span = DelimSpan::from_pair(item.sp_open, token.span); - new_pos.push_match(idx, MatchedSeq(sub, span)); - } - - // Move the "dot" past the repetition in `up` - new_pos.match_cur = item.match_hi; - new_pos.idx += 1; - cur_items.push(new_pos); - } - - // Check if we need a separator. - if idx == len && item.sep.is_some() { - // We have a separator, and it is the current token. We can advance past the - // separator token. - if item.sep - .as_ref() - .map(|sep| token_name_eq(token, sep)) - .unwrap_or(false) - { - item.idx += 1; - next_items.push(item); - } - } - // We don't need a separator. Move the "dot" back to the beginning of the matcher - // and try to match again UNLESS we are only allowed to have _one_ repetition. - else if item.seq_op != Some(quoted::KleeneOp::ZeroOrOne) { - item.match_cur = item.match_lo; - item.idx = 0; - cur_items.push(item); - } - } - // If we are not in a repetition, then being at the end of a matcher means that we have - // reached the potential end of the input. - else { - eof_items.push(item); - } - } - // We are in the middle of a matcher. - else { - // Look at what token in the matcher we are trying to match the current token (`token`) - // against. Depending on that, we may generate new items. - match item.top_elts.get_tt(idx) { - // Need to descend into a sequence - TokenTree::Sequence(sp, seq) => { - // Examine the case where there are 0 matches of this sequence. We are - // implicitly disallowing OneOrMore from having 0 matches here. Thus, that will - // result in a "no rules expected token" error by virtue of this matcher not - // working. - if seq.kleene.op == quoted::KleeneOp::ZeroOrMore - || seq.kleene.op == quoted::KleeneOp::ZeroOrOne - { - let mut new_item = item.clone(); - new_item.match_cur += seq.num_captures; - new_item.idx += 1; - for idx in item.match_cur..item.match_cur + seq.num_captures { - new_item.push_match(idx, MatchedSeq(Lrc::new(smallvec![]), sp)); - } - cur_items.push(new_item); - } - - let matches = create_matches(item.matches.len()); - cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos { - stack: smallvec![], - sep: seq.separator.clone(), - seq_op: Some(seq.kleene.op), - idx: 0, - matches, - match_lo: item.match_cur, - match_cur: item.match_cur, - match_hi: item.match_cur + seq.num_captures, - up: Some(item), - sp_open: sp.open, - top_elts: Tt(TokenTree::Sequence(sp, seq)), - }))); - } - - // We need to match a metavar (but the identifier is invalid)... this is an error - TokenTree::MetaVarDecl(span, _, id) if id.name == kw::Invalid => { - if sess.missing_fragment_specifiers.borrow_mut().remove(&span) { - return Error(span, "missing fragment specifier".to_string()); - } - } - - // We need to match a metavar with a valid ident... call out to the black-box - // parser by adding an item to `bb_items`. - TokenTree::MetaVarDecl(_, _, id) => { - // Built-in nonterminals never start with these tokens, - // so we can eliminate them from consideration. - if may_begin_with(token, id.name) { - bb_items.push(item); - } - } - - // We need to descend into a delimited submatcher or a doc comment. To do this, we - // push the current matcher onto a stack and push a new item containing the - // submatcher onto `cur_items`. - // - // At the beginning of the loop, if we reach the end of the delimited submatcher, - // we pop the stack to backtrack out of the descent. - seq @ TokenTree::Delimited(..) | - seq @ TokenTree::Token(Token { kind: DocComment(..), .. }) => { - let lower_elts = mem::replace(&mut item.top_elts, Tt(seq)); - let idx = item.idx; - item.stack.push(MatcherTtFrame { - elts: lower_elts, - idx, - }); - item.idx = 0; - cur_items.push(item); - } - - // We just matched a normal token. We can just advance the parser. - TokenTree::Token(t) if token_name_eq(&t, token) => { - item.idx += 1; - next_items.push(item); - } - - // There was another token that was not `token`... This means we can't add any - // rules. NOTE that this is not necessarily an error unless _all_ items in - // `cur_items` end up doing this. There may still be some other matchers that do - // end up working out. - TokenTree::Token(..) | TokenTree::MetaVar(..) => {} - } - } - } - - // Yay a successful parse (so far)! - Success(()) -} - -/// Use the given sequence of token trees (`ms`) as a matcher. Match the given token stream `tts` -/// against it and return the match. -/// -/// # Parameters -/// -/// - `sess`: The session into which errors are emitted -/// - `tts`: The tokenstream we are matching against the pattern `ms` -/// - `ms`: A sequence of token trees representing a pattern against which we are matching -/// - `directory`: Information about the file locations (needed for the black-box parser) -/// - `recurse_into_modules`: Whether or not to recurse into modules (needed for the black-box -/// parser) -crate fn parse( - sess: &ParseSess, - tts: TokenStream, - ms: &[TokenTree], - directory: Option>, - recurse_into_modules: bool, -) -> NamedParseResult { - // Create a parser that can be used for the "black box" parts. - let mut parser = Parser::new( - sess, - tts, - directory, - recurse_into_modules, - true, - crate::MACRO_ARGUMENTS, - ); - - // A queue of possible matcher positions. We initialize it with the matcher position in which - // the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then - // processes all of these possible matcher positions and produces possible next positions into - // `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items` - // and we start over again. - // - // This MatcherPos instance is allocated on the stack. All others -- and - // there are frequently *no* others! -- are allocated on the heap. - let mut initial = initial_matcher_pos(ms, parser.token.span); - let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)]; - let mut next_items = Vec::new(); - - loop { - // Matcher positions black-box parsed by parser.rs (`parser`) - let mut bb_items = SmallVec::new(); - - // Matcher positions that would be valid if the macro invocation was over now - let mut eof_items = SmallVec::new(); - assert!(next_items.is_empty()); - - // Process `cur_items` until either we have finished the input or we need to get some - // parsing from the black-box parser done. The result is that `next_items` will contain a - // bunch of possible next matcher positions in `next_items`. - match inner_parse_loop( - sess, - &mut cur_items, - &mut next_items, - &mut eof_items, - &mut bb_items, - &parser.token, - ) { - Success(_) => {} - Failure(token, msg) => return Failure(token, msg), - Error(sp, msg) => return Error(sp, msg), - } - - // inner parse loop handled all cur_items, so it's empty - assert!(cur_items.is_empty()); - - // We need to do some post processing after the `inner_parser_loop`. - // - // Error messages here could be improved with links to original rules. - - // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, - // either the parse is ambiguous (which should never happen) or there is a syntax error. - if parser.token == token::Eof { - if eof_items.len() == 1 { - let matches = eof_items[0] - .matches - .iter_mut() - .map(|dv| Lrc::make_mut(dv).pop().unwrap()); - return nameize(sess, ms, matches); - } else if eof_items.len() > 1 { - return Error( - parser.token.span, - "ambiguity: multiple successful parses".to_string(), - ); - } else { - return Failure( - Token::new(token::Eof, if parser.token.span.is_dummy() { - parser.token.span - } else { - sess.source_map().next_point(parser.token.span) - }), - "missing tokens in macro arguments", - ); - } - } - // Performance hack: eof_items may share matchers via Rc with other things that we want - // to modify. Dropping eof_items now may drop these refcounts to 1, preventing an - // unnecessary implicit clone later in Rc::make_mut. - drop(eof_items); - - // Another possibility is that we need to call out to parse some rust nonterminal - // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. - if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 { - let nts = bb_items - .iter() - .map(|item| match item.top_elts.get_tt(item.idx) { - TokenTree::MetaVarDecl(_, bind, name) => format!("{} ('{}')", name, bind), - _ => panic!(), - }) - .collect::>() - .join(" or "); - - return Error( - parser.token.span, - format!( - "local ambiguity: multiple parsing options: {}", - match next_items.len() { - 0 => format!("built-in NTs {}.", nts), - 1 => format!("built-in NTs {} or 1 other option.", nts), - n => format!("built-in NTs {} or {} other options.", nts, n), - } - ), - ); - } - // If there are no possible next positions AND we aren't waiting for the black-box parser, - // then there is a syntax error. - else if bb_items.is_empty() && next_items.is_empty() { - return Failure( - parser.token.take(), - "no rules expected this token in macro call", - ); - } - // Dump all possible `next_items` into `cur_items` for the next iteration. - else if !next_items.is_empty() { - // Now process the next token - cur_items.extend(next_items.drain(..)); - parser.bump(); - } - // Finally, we have the case where we need to call the black-box parser to get some - // nonterminal. - else { - assert_eq!(bb_items.len(), 1); - - let mut item = bb_items.pop().unwrap(); - if let TokenTree::MetaVarDecl(span, _, ident) = item.top_elts.get_tt(item.idx) { - let match_cur = item.match_cur; - item.push_match( - match_cur, - MatchedNonterminal(Lrc::new(parse_nt(&mut parser, span, ident.name))), - ); - item.idx += 1; - item.match_cur += 1; - } else { - unreachable!() - } - cur_items.push(item); - } - - assert!(!cur_items.is_empty()); - } -} - -/// The token is an identifier, but not `_`. -/// We prohibit passing `_` to macros expecting `ident` for now. -fn get_macro_name(token: &Token) -> Option<(Name, bool)> { - match token.kind { - token::Ident(name, is_raw) if name != kw::Underscore => Some((name, is_raw)), - _ => None, - } -} - -/// Checks whether a non-terminal may begin with a particular token. -/// -/// Returning `false` is a *stability guarantee* that such a matcher will *never* begin with that -/// token. Be conservative (return true) if not sure. -fn may_begin_with(token: &Token, name: Name) -> bool { - /// Checks whether the non-terminal may contain a single (non-keyword) identifier. - fn may_be_ident(nt: &token::Nonterminal) -> bool { - match *nt { - token::NtItem(_) | token::NtBlock(_) | token::NtVis(_) => false, - _ => true, - } - } - - match name { - sym::expr => token.can_begin_expr() - // This exception is here for backwards compatibility. - && !token.is_keyword(kw::Let), - sym::ty => token.can_begin_type(), - sym::ident => get_macro_name(token).is_some(), - sym::literal => token.can_begin_literal_or_bool(), - sym::vis => match token.kind { - // The follow-set of :vis + "priv" keyword + interpolated - token::Comma | token::Ident(..) | token::Interpolated(_) => true, - _ => token.can_begin_type(), - }, - sym::block => match token.kind { - token::OpenDelim(token::Brace) => true, - token::Interpolated(ref nt) => match **nt { - token::NtItem(_) - | token::NtPat(_) - | token::NtTy(_) - | token::NtIdent(..) - | token::NtMeta(_) - | token::NtPath(_) - | token::NtVis(_) => false, // none of these may start with '{'. - _ => true, - }, - _ => false, - }, - sym::path | sym::meta => match token.kind { - token::ModSep | token::Ident(..) => true, - token::Interpolated(ref nt) => match **nt { - token::NtPath(_) | token::NtMeta(_) => true, - _ => may_be_ident(&nt), - }, - _ => false, - }, - sym::pat => match token.kind { - token::Ident(..) | // box, ref, mut, and other identifiers (can stricten) - token::OpenDelim(token::Paren) | // tuple pattern - token::OpenDelim(token::Bracket) | // slice pattern - token::BinOp(token::And) | // reference - token::BinOp(token::Minus) | // negative literal - token::AndAnd | // double reference - token::Literal(..) | // literal - token::DotDot | // range pattern (future compat) - token::DotDotDot | // range pattern (future compat) - token::ModSep | // path - token::Lt | // path (UFCS constant) - token::BinOp(token::Shl) => true, // path (double UFCS) - token::Interpolated(ref nt) => may_be_ident(nt), - _ => false, - }, - sym::lifetime => match token.kind { - token::Lifetime(_) => true, - token::Interpolated(ref nt) => match **nt { - token::NtLifetime(_) | token::NtTT(_) => true, - _ => false, - }, - _ => false, - }, - _ => match token.kind { - token::CloseDelim(_) => false, - _ => true, - }, - } -} - -/// A call to the "black-box" parser to parse some Rust non-terminal. -/// -/// # Parameters -/// -/// - `p`: the "black-box" parser to use -/// - `sp`: the `Span` we want to parse -/// - `name`: the name of the metavar _matcher_ we want to match (e.g., `tt`, `ident`, `block`, -/// etc...) -/// -/// # Returns -/// -/// The parsed non-terminal. -fn parse_nt(p: &mut Parser<'_>, sp: Span, name: Symbol) -> Nonterminal { - if name == sym::tt { - return token::NtTT(p.parse_token_tree()); - } - // check at the beginning and the parser checks after each bump - p.process_potential_macro_variable(); - match name { - sym::item => match panictry!(p.parse_item()) { - Some(i) => token::NtItem(i), - None => { - p.fatal("expected an item keyword").emit(); - FatalError.raise(); - } - }, - sym::block => token::NtBlock(panictry!(p.parse_block())), - sym::stmt => match panictry!(p.parse_stmt()) { - Some(s) => token::NtStmt(s), - None => { - p.fatal("expected a statement").emit(); - FatalError.raise(); - } - }, - sym::pat => token::NtPat(panictry!(p.parse_pat(None))), - sym::expr => token::NtExpr(panictry!(p.parse_expr())), - sym::literal => token::NtLiteral(panictry!(p.parse_literal_maybe_minus())), - sym::ty => token::NtTy(panictry!(p.parse_ty())), - // this could be handled like a token, since it is one - sym::ident => if let Some((name, is_raw)) = get_macro_name(&p.token) { - let span = p.token.span; - p.bump(); - token::NtIdent(Ident::new(name, span), is_raw) - } else { - let token_str = pprust::token_to_string(&p.token); - p.fatal(&format!("expected ident, found {}", &token_str)).emit(); - FatalError.raise() - } - sym::path => token::NtPath(panictry!(p.parse_path(PathStyle::Type))), - sym::meta => token::NtMeta(panictry!(p.parse_meta_item())), - sym::vis => token::NtVis(panictry!(p.parse_visibility(true))), - sym::lifetime => if p.check_lifetime() { - token::NtLifetime(p.expect_lifetime().ident) - } else { - let token_str = pprust::token_to_string(&p.token); - p.fatal(&format!("expected a lifetime, found `{}`", &token_str)).emit(); - FatalError.raise(); - } - // this is not supposed to happen, since it has been checked - // when compiling the macro. - _ => p.span_bug(sp, "invalid fragment specifier"), - } -} diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs deleted file mode 100644 index 90dfa6e7ac8..00000000000 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ /dev/null @@ -1,1173 +0,0 @@ -use crate::ast; -use crate::attr::{self, TransparencyError}; -use crate::edition::Edition; -use crate::ext::base::{DummyResult, ExtCtxt, MacResult, TTMacroExpander}; -use crate::ext::base::{SyntaxExtension, SyntaxExtensionKind}; -use crate::ext::expand::{AstFragment, AstFragmentKind}; -use crate::ext::tt::macro_check; -use crate::ext::tt::macro_parser::{parse, parse_failure_msg}; -use crate::ext::tt::macro_parser::{Error, Failure, Success}; -use crate::ext::tt::macro_parser::{MatchedNonterminal, MatchedSeq}; -use crate::ext::tt::quoted; -use crate::ext::tt::transcribe::transcribe; -use crate::feature_gate::Features; -use crate::parse::parser::Parser; -use crate::parse::token::TokenKind::*; -use crate::parse::token::{self, NtTT, Token}; -use crate::parse::{Directory, ParseSess}; -use crate::symbol::{kw, sym, Symbol}; -use crate::tokenstream::{DelimSpan, TokenStream, TokenTree}; - -use errors::{DiagnosticBuilder, FatalError}; -use log::debug; -use syntax_pos::hygiene::Transparency; -use syntax_pos::Span; - -use rustc_data_structures::fx::FxHashMap; -use std::borrow::Cow; -use std::collections::hash_map::Entry; -use std::slice; - -use errors::Applicability; -use rustc_data_structures::sync::Lrc; - -const VALID_FRAGMENT_NAMES_MSG: &str = "valid fragment specifiers are \ - `ident`, `block`, `stmt`, `expr`, `pat`, `ty`, `lifetime`, \ - `literal`, `path`, `meta`, `tt`, `item` and `vis`"; - -crate struct ParserAnyMacro<'a> { - parser: Parser<'a>, - - /// Span of the expansion site of the macro this parser is for - site_span: Span, - /// The ident of the macro we're parsing - macro_ident: ast::Ident, - arm_span: Span, -} - -crate fn annotate_err_with_kind( - err: &mut DiagnosticBuilder<'_>, - kind: AstFragmentKind, - span: Span, -) { - match kind { - AstFragmentKind::Ty => { - err.span_label(span, "this macro call doesn't expand to a type"); - } - AstFragmentKind::Pat => { - err.span_label(span, "this macro call doesn't expand to a pattern"); - } - _ => {} - }; -} - -impl<'a> ParserAnyMacro<'a> { - crate fn make(mut self: Box>, kind: AstFragmentKind) -> AstFragment { - let ParserAnyMacro { site_span, macro_ident, ref mut parser, arm_span } = *self; - let fragment = panictry!(parser.parse_ast_fragment(kind, true).map_err(|mut e| { - if parser.token == token::Eof && e.message().ends_with(", found ``") { - if !e.span.is_dummy() { - // early end of macro arm (#52866) - e.replace_span_with(parser.sess.source_map().next_point(parser.token.span)); - } - let msg = &e.message[0]; - e.message[0] = ( - format!( - "macro expansion ends with an incomplete expression: {}", - msg.0.replace(", found ``", ""), - ), - msg.1, - ); - } - if e.span.is_dummy() { - // Get around lack of span in error (#30128) - e.replace_span_with(site_span); - if parser.sess.source_map().span_to_filename(arm_span).is_real() { - e.span_label(arm_span, "in this macro arm"); - } - } else if !parser.sess.source_map().span_to_filename(parser.token.span).is_real() { - e.span_label(site_span, "in this macro invocation"); - } - match kind { - AstFragmentKind::Pat if macro_ident.name == sym::vec => { - let mut suggestion = None; - if let Ok(code) = parser.sess.source_map().span_to_snippet(site_span) { - if let Some(bang) = code.find('!') { - suggestion = Some(code[bang + 1..].to_string()); - } - } - if let Some(suggestion) = suggestion { - e.span_suggestion( - site_span, - "use a slice pattern here instead", - suggestion, - Applicability::MachineApplicable, - ); - } else { - e.span_label( - site_span, - "use a slice pattern here instead", - ); - } - e.help("for more information, see https://doc.rust-lang.org/edition-guide/\ - rust-2018/slice-patterns.html"); - } - _ => annotate_err_with_kind(&mut e, kind, site_span), - }; - e - })); - - // We allow semicolons at the end of expressions -- e.g., the semicolon in - // `macro_rules! m { () => { panic!(); } }` isn't parsed by `.parse_expr()`, - // but `m!()` is allowed in expression positions (cf. issue #34706). - if kind == AstFragmentKind::Expr && parser.token == token::Semi { - parser.bump(); - } - - // Make sure we don't have any tokens left to parse so we don't silently drop anything. - let path = ast::Path::from_ident(macro_ident.with_span_pos(site_span)); - parser.ensure_complete_parse(&path, kind.name(), site_span); - fragment - } -} - -struct MacroRulesMacroExpander { - name: ast::Ident, - span: Span, - transparency: Transparency, - lhses: Vec, - rhses: Vec, - valid: bool, -} - -impl TTMacroExpander for MacroRulesMacroExpander { - fn expand<'cx>( - &self, - cx: &'cx mut ExtCtxt<'_>, - sp: Span, - input: TokenStream, - ) -> Box { - if !self.valid { - return DummyResult::any(sp); - } - generic_extension( - cx, sp, self.span, self.name, self.transparency, input, &self.lhses, &self.rhses - ) - } -} - -fn trace_macros_note(cx: &mut ExtCtxt<'_>, sp: Span, message: String) { - let sp = sp.macro_backtrace().last().map(|trace| trace.call_site).unwrap_or(sp); - cx.expansions.entry(sp).or_default().push(message); -} - -/// Given `lhses` and `rhses`, this is the new macro we create -fn generic_extension<'cx>( - cx: &'cx mut ExtCtxt<'_>, - sp: Span, - def_span: Span, - name: ast::Ident, - transparency: Transparency, - arg: TokenStream, - lhses: &[quoted::TokenTree], - rhses: &[quoted::TokenTree], -) -> Box { - if cx.trace_macros() { - trace_macros_note(cx, sp, format!("expanding `{}! {{ {} }}`", name, arg)); - } - - // Which arm's failure should we report? (the one furthest along) - let mut best_failure: Option<(Token, &str)> = None; - - for (i, lhs) in lhses.iter().enumerate() { - // try each arm's matchers - let lhs_tt = match *lhs { - quoted::TokenTree::Delimited(_, ref delim) => &delim.tts[..], - _ => cx.span_bug(sp, "malformed macro lhs"), - }; - - match TokenTree::parse(cx, lhs_tt, arg.clone()) { - Success(named_matches) => { - let rhs = match rhses[i] { - // ignore delimiters - quoted::TokenTree::Delimited(_, ref delimed) => delimed.tts.clone(), - _ => cx.span_bug(sp, "malformed macro rhs"), - }; - let arm_span = rhses[i].span(); - - let rhs_spans = rhs.iter().map(|t| t.span()).collect::>(); - // rhs has holes ( `$id` and `$(...)` that need filled) - let mut tts = transcribe(cx, &named_matches, rhs, transparency); - - // Replace all the tokens for the corresponding positions in the macro, to maintain - // proper positions in error reporting, while maintaining the macro_backtrace. - if rhs_spans.len() == tts.len() { - tts = tts.map_enumerated(|i, mut tt| { - let mut sp = rhs_spans[i]; - sp = sp.with_ctxt(tt.span().ctxt()); - tt.set_span(sp); - tt - }); - } - - if cx.trace_macros() { - trace_macros_note(cx, sp, format!("to `{}`", tts)); - } - - let directory = Directory { - path: Cow::from(cx.current_expansion.module.directory.as_path()), - ownership: cx.current_expansion.directory_ownership, - }; - let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), true, false, None); - p.root_module_name = - cx.current_expansion.module.mod_path.last().map(|id| id.as_str().to_string()); - p.last_type_ascription = cx.current_expansion.prior_type_ascription; - - p.process_potential_macro_variable(); - // Let the context choose how to interpret the result. - // Weird, but useful for X-macros. - return Box::new(ParserAnyMacro { - parser: p, - - // Pass along the original expansion site and the name of the macro - // so we can print a useful error message if the parse of the expanded - // macro leaves unparsed tokens. - site_span: sp, - macro_ident: name, - arm_span, - }); - } - Failure(token, msg) => match best_failure { - Some((ref best_token, _)) if best_token.span.lo() >= token.span.lo() => {} - _ => best_failure = Some((token, msg)), - }, - Error(err_sp, ref msg) => cx.span_fatal(err_sp.substitute_dummy(sp), &msg[..]), - } - } - - let (token, label) = best_failure.expect("ran no matchers"); - let span = token.span.substitute_dummy(sp); - let mut err = cx.struct_span_err(span, &parse_failure_msg(&token)); - err.span_label(span, label); - if !def_span.is_dummy() && cx.source_map().span_to_filename(def_span).is_real() { - err.span_label(cx.source_map().def_span(def_span), "when calling this macro"); - } - - // Check whether there's a missing comma in this macro call, like `println!("{}" a);` - if let Some((arg, comma_span)) = arg.add_comma() { - for lhs in lhses { - // try each arm's matchers - let lhs_tt = match *lhs { - quoted::TokenTree::Delimited(_, ref delim) => &delim.tts[..], - _ => continue, - }; - match TokenTree::parse(cx, lhs_tt, arg.clone()) { - Success(_) => { - if comma_span.is_dummy() { - err.note("you might be missing a comma"); - } else { - err.span_suggestion_short( - comma_span, - "missing comma here", - ", ".to_string(), - Applicability::MachineApplicable, - ); - } - } - _ => {} - } - } - } - err.emit(); - cx.trace_macros_diag(); - DummyResult::any(sp) -} - -// Note that macro-by-example's input is also matched against a token tree: -// $( $lhs:tt => $rhs:tt );+ -// -// Holy self-referential! - -/// Converts a macro item into a syntax extension. -pub fn compile_declarative_macro( - sess: &ParseSess, - features: &Features, - def: &ast::Item, - edition: Edition, -) -> SyntaxExtension { - let diag = &sess.span_diagnostic; - let lhs_nm = ast::Ident::new(sym::lhs, def.span); - let rhs_nm = ast::Ident::new(sym::rhs, def.span); - let tt_spec = ast::Ident::new(sym::tt, def.span); - - // Parse the macro_rules! invocation - let body = match def.node { - ast::ItemKind::MacroDef(ref body) => body, - _ => unreachable!(), - }; - - // The pattern that macro_rules matches. - // The grammar for macro_rules! is: - // $( $lhs:tt => $rhs:tt );+ - // ...quasiquoting this would be nice. - // These spans won't matter, anyways - let argument_gram = vec![ - quoted::TokenTree::Sequence( - DelimSpan::dummy(), - Lrc::new(quoted::SequenceRepetition { - tts: vec![ - quoted::TokenTree::MetaVarDecl(def.span, lhs_nm, tt_spec), - quoted::TokenTree::token(token::FatArrow, def.span), - quoted::TokenTree::MetaVarDecl(def.span, rhs_nm, tt_spec), - ], - separator: Some(Token::new( - if body.legacy { token::Semi } else { token::Comma }, - def.span, - )), - kleene: quoted::KleeneToken::new(quoted::KleeneOp::OneOrMore, def.span), - num_captures: 2, - }), - ), - // to phase into semicolon-termination instead of semicolon-separation - quoted::TokenTree::Sequence( - DelimSpan::dummy(), - Lrc::new(quoted::SequenceRepetition { - tts: vec![quoted::TokenTree::token( - if body.legacy { token::Semi } else { token::Comma }, - def.span, - )], - separator: None, - kleene: quoted::KleeneToken::new(quoted::KleeneOp::ZeroOrMore, def.span), - num_captures: 0, - }), - ), - ]; - - let argument_map = match parse(sess, body.stream(), &argument_gram, None, true) { - Success(m) => m, - Failure(token, msg) => { - let s = parse_failure_msg(&token); - let sp = token.span.substitute_dummy(def.span); - let mut err = sess.span_diagnostic.struct_span_fatal(sp, &s); - err.span_label(sp, msg); - err.emit(); - FatalError.raise(); - } - Error(sp, s) => { - sess.span_diagnostic.span_fatal(sp.substitute_dummy(def.span), &s).raise(); - } - }; - - let mut valid = true; - - // Extract the arguments: - let lhses = match argument_map[&lhs_nm] { - MatchedSeq(ref s, _) => s - .iter() - .map(|m| { - if let MatchedNonterminal(ref nt) = *m { - if let NtTT(ref tt) = **nt { - let tt = quoted::parse( - tt.clone().into(), - true, - sess, - features, - &def.attrs, - edition, - def.id, - ) - .pop() - .unwrap(); - valid &= check_lhs_nt_follows(sess, features, &def.attrs, &tt); - return tt; - } - } - sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") - }) - .collect::>(), - _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs"), - }; - - let rhses = match argument_map[&rhs_nm] { - MatchedSeq(ref s, _) => s - .iter() - .map(|m| { - if let MatchedNonterminal(ref nt) = *m { - if let NtTT(ref tt) = **nt { - return quoted::parse( - tt.clone().into(), - false, - sess, - features, - &def.attrs, - edition, - def.id, - ) - .pop() - .unwrap(); - } - } - sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") - }) - .collect::>(), - _ => sess.span_diagnostic.span_bug(def.span, "wrong-structured rhs"), - }; - - for rhs in &rhses { - valid &= check_rhs(sess, rhs); - } - - // don't abort iteration early, so that errors for multiple lhses can be reported - for lhs in &lhses { - valid &= check_lhs_no_empty_seq(sess, slice::from_ref(lhs)); - } - - // We use CRATE_NODE_ID instead of `def.id` otherwise we may emit buffered lints for a node id - // that is not lint-checked and trigger the "failed to process buffered lint here" bug. - valid &= macro_check::check_meta_variables(sess, ast::CRATE_NODE_ID, def.span, &lhses, &rhses); - - let (transparency, transparency_error) = attr::find_transparency(&def.attrs, body.legacy); - match transparency_error { - Some(TransparencyError::UnknownTransparency(value, span)) => - diag.span_err(span, &format!("unknown macro transparency: `{}`", value)), - Some(TransparencyError::MultipleTransparencyAttrs(old_span, new_span)) => - diag.span_err(vec![old_span, new_span], "multiple macro transparency attributes"), - None => {} - } - - let expander: Box<_> = Box::new(MacroRulesMacroExpander { - name: def.ident, span: def.span, transparency, lhses, rhses, valid - }); - - SyntaxExtension::new( - sess, - SyntaxExtensionKind::LegacyBang(expander), - def.span, - Vec::new(), - edition, - def.ident.name, - &def.attrs, - ) -} - -fn check_lhs_nt_follows( - sess: &ParseSess, - features: &Features, - attrs: &[ast::Attribute], - lhs: "ed::TokenTree, -) -> bool { - // lhs is going to be like TokenTree::Delimited(...), where the - // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens. - if let quoted::TokenTree::Delimited(_, ref tts) = *lhs { - check_matcher(sess, features, attrs, &tts.tts) - } else { - let msg = "invalid macro matcher; matchers must be contained in balanced delimiters"; - sess.span_diagnostic.span_err(lhs.span(), msg); - false - } - // we don't abort on errors on rejection, the driver will do that for us - // after parsing/expansion. we can report every error in every macro this way. -} - -/// Checks that the lhs contains no repetition which could match an empty token -/// tree, because then the matcher would hang indefinitely. -fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[quoted::TokenTree]) -> bool { - use quoted::TokenTree; - for tt in tts { - match *tt { - TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => (), - TokenTree::Delimited(_, ref del) => { - if !check_lhs_no_empty_seq(sess, &del.tts) { - return false; - } - } - TokenTree::Sequence(span, ref seq) => { - if seq.separator.is_none() - && seq.tts.iter().all(|seq_tt| match *seq_tt { - TokenTree::MetaVarDecl(_, _, id) => id.name == sym::vis, - TokenTree::Sequence(_, ref sub_seq) => { - sub_seq.kleene.op == quoted::KleeneOp::ZeroOrMore - || sub_seq.kleene.op == quoted::KleeneOp::ZeroOrOne - } - _ => false, - }) - { - let sp = span.entire(); - sess.span_diagnostic.span_err(sp, "repetition matches empty token tree"); - return false; - } - if !check_lhs_no_empty_seq(sess, &seq.tts) { - return false; - } - } - } - } - - true -} - -fn check_rhs(sess: &ParseSess, rhs: "ed::TokenTree) -> bool { - match *rhs { - quoted::TokenTree::Delimited(..) => return true, - _ => sess.span_diagnostic.span_err(rhs.span(), "macro rhs must be delimited"), - } - false -} - -fn check_matcher( - sess: &ParseSess, - features: &Features, - attrs: &[ast::Attribute], - matcher: &[quoted::TokenTree], -) -> bool { - let first_sets = FirstSets::new(matcher); - let empty_suffix = TokenSet::empty(); - let err = sess.span_diagnostic.err_count(); - check_matcher_core(sess, features, attrs, &first_sets, matcher, &empty_suffix); - err == sess.span_diagnostic.err_count() -} - -// `The FirstSets` for a matcher is a mapping from subsequences in the -// matcher to the FIRST set for that subsequence. -// -// This mapping is partially precomputed via a backwards scan over the -// token trees of the matcher, which provides a mapping from each -// repetition sequence to its *first* set. -// -// (Hypothetically, sequences should be uniquely identifiable via their -// spans, though perhaps that is false, e.g., for macro-generated macros -// that do not try to inject artificial span information. My plan is -// to try to catch such cases ahead of time and not include them in -// the precomputed mapping.) -struct FirstSets { - // this maps each TokenTree::Sequence `$(tt ...) SEP OP` that is uniquely identified by its - // span in the original matcher to the First set for the inner sequence `tt ...`. - // - // If two sequences have the same span in a matcher, then map that - // span to None (invalidating the mapping here and forcing the code to - // use a slow path). - first: FxHashMap>, -} - -impl FirstSets { - fn new(tts: &[quoted::TokenTree]) -> FirstSets { - use quoted::TokenTree; - - let mut sets = FirstSets { first: FxHashMap::default() }; - build_recur(&mut sets, tts); - return sets; - - // walks backward over `tts`, returning the FIRST for `tts` - // and updating `sets` at the same time for all sequence - // substructure we find within `tts`. - fn build_recur(sets: &mut FirstSets, tts: &[TokenTree]) -> TokenSet { - let mut first = TokenSet::empty(); - for tt in tts.iter().rev() { - match *tt { - TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => { - first.replace_with(tt.clone()); - } - TokenTree::Delimited(span, ref delimited) => { - build_recur(sets, &delimited.tts[..]); - first.replace_with(delimited.open_tt(span.open)); - } - TokenTree::Sequence(sp, ref seq_rep) => { - let subfirst = build_recur(sets, &seq_rep.tts[..]); - - match sets.first.entry(sp.entire()) { - Entry::Vacant(vac) => { - vac.insert(Some(subfirst.clone())); - } - Entry::Occupied(mut occ) => { - // if there is already an entry, then a span must have collided. - // This should not happen with typical macro_rules macros, - // but syntax extensions need not maintain distinct spans, - // so distinct syntax trees can be assigned the same span. - // In such a case, the map cannot be trusted; so mark this - // entry as unusable. - occ.insert(None); - } - } - - // If the sequence contents can be empty, then the first - // token could be the separator token itself. - - if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { - first.add_one_maybe(TokenTree::Token(sep.clone())); - } - - // Reverse scan: Sequence comes before `first`. - if subfirst.maybe_empty - || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrMore - || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrOne - { - // If sequence is potentially empty, then - // union them (preserving first emptiness). - first.add_all(&TokenSet { maybe_empty: true, ..subfirst }); - } else { - // Otherwise, sequence guaranteed - // non-empty; replace first. - first = subfirst; - } - } - } - } - - first - } - } - - // walks forward over `tts` until all potential FIRST tokens are - // identified. - fn first(&self, tts: &[quoted::TokenTree]) -> TokenSet { - use quoted::TokenTree; - - let mut first = TokenSet::empty(); - for tt in tts.iter() { - assert!(first.maybe_empty); - match *tt { - TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => { - first.add_one(tt.clone()); - return first; - } - TokenTree::Delimited(span, ref delimited) => { - first.add_one(delimited.open_tt(span.open)); - return first; - } - TokenTree::Sequence(sp, ref seq_rep) => { - let subfirst_owned; - let subfirst = match self.first.get(&sp.entire()) { - Some(&Some(ref subfirst)) => subfirst, - Some(&None) => { - subfirst_owned = self.first(&seq_rep.tts[..]); - &subfirst_owned - } - None => { - panic!("We missed a sequence during FirstSets construction"); - } - }; - - // If the sequence contents can be empty, then the first - // token could be the separator token itself. - if let (Some(sep), true) = (&seq_rep.separator, subfirst.maybe_empty) { - first.add_one_maybe(TokenTree::Token(sep.clone())); - } - - assert!(first.maybe_empty); - first.add_all(subfirst); - if subfirst.maybe_empty - || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrMore - || seq_rep.kleene.op == quoted::KleeneOp::ZeroOrOne - { - // Continue scanning for more first - // tokens, but also make sure we - // restore empty-tracking state. - first.maybe_empty = true; - continue; - } else { - return first; - } - } - } - } - - // we only exit the loop if `tts` was empty or if every - // element of `tts` matches the empty sequence. - assert!(first.maybe_empty); - first - } -} - -// A set of `quoted::TokenTree`s, which may include `TokenTree::Match`s -// (for macro-by-example syntactic variables). It also carries the -// `maybe_empty` flag; that is true if and only if the matcher can -// match an empty token sequence. -// -// The First set is computed on submatchers like `$($a:expr b),* $(c)* d`, -// which has corresponding FIRST = {$a:expr, c, d}. -// Likewise, `$($a:expr b),* $(c)+ d` has FIRST = {$a:expr, c}. -// -// (Notably, we must allow for *-op to occur zero times.) -#[derive(Clone, Debug)] -struct TokenSet { - tokens: Vec, - maybe_empty: bool, -} - -impl TokenSet { - // Returns a set for the empty sequence. - fn empty() -> Self { - TokenSet { tokens: Vec::new(), maybe_empty: true } - } - - // Returns the set `{ tok }` for the single-token (and thus - // non-empty) sequence [tok]. - fn singleton(tok: quoted::TokenTree) -> Self { - TokenSet { tokens: vec![tok], maybe_empty: false } - } - - // Changes self to be the set `{ tok }`. - // Since `tok` is always present, marks self as non-empty. - fn replace_with(&mut self, tok: quoted::TokenTree) { - self.tokens.clear(); - self.tokens.push(tok); - self.maybe_empty = false; - } - - // Changes self to be the empty set `{}`; meant for use when - // the particular token does not matter, but we want to - // record that it occurs. - fn replace_with_irrelevant(&mut self) { - self.tokens.clear(); - self.maybe_empty = false; - } - - // Adds `tok` to the set for `self`, marking sequence as non-empy. - fn add_one(&mut self, tok: quoted::TokenTree) { - if !self.tokens.contains(&tok) { - self.tokens.push(tok); - } - self.maybe_empty = false; - } - - // Adds `tok` to the set for `self`. (Leaves `maybe_empty` flag alone.) - fn add_one_maybe(&mut self, tok: quoted::TokenTree) { - if !self.tokens.contains(&tok) { - self.tokens.push(tok); - } - } - - // Adds all elements of `other` to this. - // - // (Since this is a set, we filter out duplicates.) - // - // If `other` is potentially empty, then preserves the previous - // setting of the empty flag of `self`. If `other` is guaranteed - // non-empty, then `self` is marked non-empty. - fn add_all(&mut self, other: &Self) { - for tok in &other.tokens { - if !self.tokens.contains(tok) { - self.tokens.push(tok.clone()); - } - } - if !other.maybe_empty { - self.maybe_empty = false; - } - } -} - -// Checks that `matcher` is internally consistent and that it -// can legally be followed by a token `N`, for all `N` in `follow`. -// (If `follow` is empty, then it imposes no constraint on -// the `matcher`.) -// -// Returns the set of NT tokens that could possibly come last in -// `matcher`. (If `matcher` matches the empty sequence, then -// `maybe_empty` will be set to true.) -// -// Requires that `first_sets` is pre-computed for `matcher`; -// see `FirstSets::new`. -fn check_matcher_core( - sess: &ParseSess, - features: &Features, - attrs: &[ast::Attribute], - first_sets: &FirstSets, - matcher: &[quoted::TokenTree], - follow: &TokenSet, -) -> TokenSet { - use quoted::TokenTree; - - let mut last = TokenSet::empty(); - - // 2. For each token and suffix [T, SUFFIX] in M: - // ensure that T can be followed by SUFFIX, and if SUFFIX may be empty, - // then ensure T can also be followed by any element of FOLLOW. - 'each_token: for i in 0..matcher.len() { - let token = &matcher[i]; - let suffix = &matcher[i + 1..]; - - let build_suffix_first = || { - let mut s = first_sets.first(suffix); - if s.maybe_empty { - s.add_all(follow); - } - s - }; - - // (we build `suffix_first` on demand below; you can tell - // which cases are supposed to fall through by looking for the - // initialization of this variable.) - let suffix_first; - - // First, update `last` so that it corresponds to the set - // of NT tokens that might end the sequence `... token`. - match *token { - TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarDecl(..) => { - let can_be_followed_by_any; - if let Err(bad_frag) = has_legal_fragment_specifier(sess, features, attrs, token) { - let msg = format!("invalid fragment specifier `{}`", bad_frag); - sess.span_diagnostic - .struct_span_err(token.span(), &msg) - .help(VALID_FRAGMENT_NAMES_MSG) - .emit(); - // (This eliminates false positives and duplicates - // from error messages.) - can_be_followed_by_any = true; - } else { - can_be_followed_by_any = token_can_be_followed_by_any(token); - } - - if can_be_followed_by_any { - // don't need to track tokens that work with any, - last.replace_with_irrelevant(); - // ... and don't need to check tokens that can be - // followed by anything against SUFFIX. - continue 'each_token; - } else { - last.replace_with(token.clone()); - suffix_first = build_suffix_first(); - } - } - TokenTree::Delimited(span, ref d) => { - let my_suffix = TokenSet::singleton(d.close_tt(span.close)); - check_matcher_core(sess, features, attrs, first_sets, &d.tts, &my_suffix); - // don't track non NT tokens - last.replace_with_irrelevant(); - - // also, we don't need to check delimited sequences - // against SUFFIX - continue 'each_token; - } - TokenTree::Sequence(_, ref seq_rep) => { - suffix_first = build_suffix_first(); - // The trick here: when we check the interior, we want - // to include the separator (if any) as a potential - // (but not guaranteed) element of FOLLOW. So in that - // case, we make a temp copy of suffix and stuff - // delimiter in there. - // - // FIXME: Should I first scan suffix_first to see if - // delimiter is already in it before I go through the - // work of cloning it? But then again, this way I may - // get a "tighter" span? - let mut new; - let my_suffix = if let Some(sep) = &seq_rep.separator { - new = suffix_first.clone(); - new.add_one_maybe(TokenTree::Token(sep.clone())); - &new - } else { - &suffix_first - }; - - // At this point, `suffix_first` is built, and - // `my_suffix` is some TokenSet that we can use - // for checking the interior of `seq_rep`. - let next = - check_matcher_core(sess, features, attrs, first_sets, &seq_rep.tts, my_suffix); - if next.maybe_empty { - last.add_all(&next); - } else { - last = next; - } - - // the recursive call to check_matcher_core already ran the 'each_last - // check below, so we can just keep going forward here. - continue 'each_token; - } - } - - // (`suffix_first` guaranteed initialized once reaching here.) - - // Now `last` holds the complete set of NT tokens that could - // end the sequence before SUFFIX. Check that every one works with `suffix`. - 'each_last: for token in &last.tokens { - if let TokenTree::MetaVarDecl(_, name, frag_spec) = *token { - for next_token in &suffix_first.tokens { - match is_in_follow(next_token, frag_spec.name) { - IsInFollow::Invalid(msg, help) => { - sess.span_diagnostic - .struct_span_err(next_token.span(), &msg) - .help(help) - .emit(); - // don't bother reporting every source of - // conflict for a particular element of `last`. - continue 'each_last; - } - IsInFollow::Yes => {} - IsInFollow::No(possible) => { - let may_be = if last.tokens.len() == 1 && suffix_first.tokens.len() == 1 - { - "is" - } else { - "may be" - }; - - let sp = next_token.span(); - let mut err = sess.span_diagnostic.struct_span_err( - sp, - &format!( - "`${name}:{frag}` {may_be} followed by `{next}`, which \ - is not allowed for `{frag}` fragments", - name = name, - frag = frag_spec, - next = quoted_tt_to_string(next_token), - may_be = may_be - ), - ); - err.span_label( - sp, - format!("not allowed after `{}` fragments", frag_spec), - ); - let msg = "allowed there are: "; - match possible { - &[] => {} - &[t] => { - err.note(&format!( - "only {} is allowed after `{}` fragments", - t, frag_spec, - )); - } - ts => { - err.note(&format!( - "{}{} or {}", - msg, - ts[..ts.len() - 1] - .iter() - .map(|s| *s) - .collect::>() - .join(", "), - ts[ts.len() - 1], - )); - } - } - err.emit(); - } - } - } - } - } - } - last -} - -fn token_can_be_followed_by_any(tok: "ed::TokenTree) -> bool { - if let quoted::TokenTree::MetaVarDecl(_, _, frag_spec) = *tok { - frag_can_be_followed_by_any(frag_spec.name) - } else { - // (Non NT's can always be followed by anthing in matchers.) - true - } -} - -/// Returns `true` if a fragment of type `frag` can be followed by any sort of -/// token. We use this (among other things) as a useful approximation -/// for when `frag` can be followed by a repetition like `$(...)*` or -/// `$(...)+`. In general, these can be a bit tricky to reason about, -/// so we adopt a conservative position that says that any fragment -/// specifier which consumes at most one token tree can be followed by -/// a fragment specifier (indeed, these fragments can be followed by -/// ANYTHING without fear of future compatibility hazards). -fn frag_can_be_followed_by_any(frag: Symbol) -> bool { - match frag { - sym::item | // always terminated by `}` or `;` - sym::block | // exactly one token tree - sym::ident | // exactly one token tree - sym::literal | // exactly one token tree - sym::meta | // exactly one token tree - sym::lifetime | // exactly one token tree - sym::tt => // exactly one token tree - true, - - _ => - false, - } -} - -enum IsInFollow { - Yes, - No(&'static [&'static str]), - Invalid(String, &'static str), -} - -/// Returns `true` if `frag` can legally be followed by the token `tok`. For -/// fragments that can consume an unbounded number of tokens, `tok` -/// must be within a well-defined follow set. This is intended to -/// guarantee future compatibility: for example, without this rule, if -/// we expanded `expr` to include a new binary operator, we might -/// break macros that were relying on that binary operator as a -/// separator. -// when changing this do not forget to update doc/book/macros.md! -fn is_in_follow(tok: "ed::TokenTree, frag: Symbol) -> IsInFollow { - use quoted::TokenTree; - - if let TokenTree::Token(Token { kind: token::CloseDelim(_), .. }) = *tok { - // closing a token tree can never be matched by any fragment; - // iow, we always require that `(` and `)` match, etc. - IsInFollow::Yes - } else { - match frag { - sym::item => { - // since items *must* be followed by either a `;` or a `}`, we can - // accept anything after them - IsInFollow::Yes - } - sym::block => { - // anything can follow block, the braces provide an easy boundary to - // maintain - IsInFollow::Yes - } - sym::stmt | sym::expr => { - const TOKENS: &[&str] = &["`=>`", "`,`", "`;`"]; - match tok { - TokenTree::Token(token) => match token.kind { - FatArrow | Comma | Semi => IsInFollow::Yes, - _ => IsInFollow::No(TOKENS), - }, - _ => IsInFollow::No(TOKENS), - } - } - sym::pat => { - const TOKENS: &[&str] = &["`=>`", "`,`", "`=`", "`|`", "`if`", "`in`"]; - match tok { - TokenTree::Token(token) => match token.kind { - FatArrow | Comma | Eq | BinOp(token::Or) => IsInFollow::Yes, - Ident(name, false) if name == kw::If || name == kw::In => IsInFollow::Yes, - _ => IsInFollow::No(TOKENS), - }, - _ => IsInFollow::No(TOKENS), - } - } - sym::path | sym::ty => { - const TOKENS: &[&str] = &[ - "`{`", "`[`", "`=>`", "`,`", "`>`", "`=`", "`:`", "`;`", "`|`", "`as`", - "`where`", - ]; - match tok { - TokenTree::Token(token) => match token.kind { - OpenDelim(token::DelimToken::Brace) - | OpenDelim(token::DelimToken::Bracket) - | Comma - | FatArrow - | Colon - | Eq - | Gt - | BinOp(token::Shr) - | Semi - | BinOp(token::Or) => IsInFollow::Yes, - Ident(name, false) if name == kw::As || name == kw::Where => { - IsInFollow::Yes - } - _ => IsInFollow::No(TOKENS), - }, - TokenTree::MetaVarDecl(_, _, frag) if frag.name == sym::block => { - IsInFollow::Yes - } - _ => IsInFollow::No(TOKENS), - } - } - sym::ident | sym::lifetime => { - // being a single token, idents and lifetimes are harmless - IsInFollow::Yes - } - sym::literal => { - // literals may be of a single token, or two tokens (negative numbers) - IsInFollow::Yes - } - sym::meta | sym::tt => { - // being either a single token or a delimited sequence, tt is - // harmless - IsInFollow::Yes - } - sym::vis => { - // Explicitly disallow `priv`, on the off chance it comes back. - const TOKENS: &[&str] = &["`,`", "an ident", "a type"]; - match tok { - TokenTree::Token(token) => match token.kind { - Comma => IsInFollow::Yes, - Ident(name, is_raw) if is_raw || name != kw::Priv => IsInFollow::Yes, - _ => { - if token.can_begin_type() { - IsInFollow::Yes - } else { - IsInFollow::No(TOKENS) - } - } - }, - TokenTree::MetaVarDecl(_, _, frag) - if frag.name == sym::ident - || frag.name == sym::ty - || frag.name == sym::path => - { - IsInFollow::Yes - } - _ => IsInFollow::No(TOKENS), - } - } - kw::Invalid => IsInFollow::Yes, - _ => IsInFollow::Invalid( - format!("invalid fragment specifier `{}`", frag), - VALID_FRAGMENT_NAMES_MSG, - ), - } - } -} - -fn has_legal_fragment_specifier( - sess: &ParseSess, - features: &Features, - attrs: &[ast::Attribute], - tok: "ed::TokenTree, -) -> Result<(), String> { - debug!("has_legal_fragment_specifier({:?})", tok); - if let quoted::TokenTree::MetaVarDecl(_, _, ref frag_spec) = *tok { - let frag_span = tok.span(); - if !is_legal_fragment_specifier(sess, features, attrs, frag_spec.name, frag_span) { - return Err(frag_spec.to_string()); - } - } - Ok(()) -} - -fn is_legal_fragment_specifier( - _sess: &ParseSess, - _features: &Features, - _attrs: &[ast::Attribute], - frag_name: Symbol, - _frag_span: Span, -) -> bool { - /* - * If new fragment specifiers are invented in nightly, `_sess`, - * `_features`, `_attrs`, and `_frag_span` will be useful here - * for checking against feature gates. See past versions of - * this function. - */ - match frag_name { - sym::item - | sym::block - | sym::stmt - | sym::expr - | sym::pat - | sym::lifetime - | sym::path - | sym::ty - | sym::ident - | sym::meta - | sym::tt - | sym::vis - | sym::literal - | kw::Invalid => true, - _ => false, - } -} - -fn quoted_tt_to_string(tt: "ed::TokenTree) -> String { - match *tt { - quoted::TokenTree::Token(ref token) => crate::print::pprust::token_to_string(&token), - quoted::TokenTree::MetaVar(_, name) => format!("${}", name), - quoted::TokenTree::MetaVarDecl(_, name, kind) => format!("${}:{}", name, kind), - _ => panic!( - "unexpected quoted::TokenTree::{{Sequence or Delimited}} \ - in follow set checker" - ), - } -} diff --git a/src/libsyntax/ext/tt/quoted.rs b/src/libsyntax/ext/tt/quoted.rs deleted file mode 100644 index d161e6638bf..00000000000 --- a/src/libsyntax/ext/tt/quoted.rs +++ /dev/null @@ -1,433 +0,0 @@ -use crate::ast; -use crate::ast::NodeId; -use crate::ext::tt::macro_parser; -use crate::feature_gate::Features; -use crate::parse::token::{self, Token, TokenKind}; -use crate::parse::ParseSess; -use crate::print::pprust; -use crate::symbol::kw; -use crate::tokenstream::{self, DelimSpan}; - -use syntax_pos::{edition::Edition, BytePos, Span}; - -use rustc_data_structures::sync::Lrc; -use std::iter::Peekable; - -/// Contains the sub-token-trees of a "delimited" token tree, such as the contents of `(`. Note -/// that the delimiter itself might be `NoDelim`. -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] -crate struct Delimited { - crate delim: token::DelimToken, - crate tts: Vec, -} - -impl Delimited { - /// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter. - crate fn open_tt(&self, span: Span) -> TokenTree { - let open_span = if span.is_dummy() { - span - } else { - span.with_hi(span.lo() + BytePos(self.delim.len() as u32)) - }; - TokenTree::token(token::OpenDelim(self.delim), open_span) - } - - /// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter. - crate fn close_tt(&self, span: Span) -> TokenTree { - let close_span = if span.is_dummy() { - span - } else { - span.with_lo(span.hi() - BytePos(self.delim.len() as u32)) - }; - TokenTree::token(token::CloseDelim(self.delim), close_span) - } -} - -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug)] -crate struct SequenceRepetition { - /// The sequence of token trees - crate tts: Vec, - /// The optional separator - crate separator: Option, - /// Whether the sequence can be repeated zero (*), or one or more times (+) - crate kleene: KleeneToken, - /// The number of `Match`s that appear in the sequence (and subsequences) - crate num_captures: usize, -} - -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Debug, Copy)] -crate struct KleeneToken { - crate span: Span, - crate op: KleeneOp, -} - -impl KleeneToken { - crate fn new(op: KleeneOp, span: Span) -> KleeneToken { - KleeneToken { span, op } - } -} - -/// A Kleene-style [repetition operator](http://en.wikipedia.org/wiki/Kleene_star) -/// for token sequences. -#[derive(Clone, PartialEq, RustcEncodable, RustcDecodable, Hash, Debug, Copy)] -crate enum KleeneOp { - /// Kleene star (`*`) for zero or more repetitions - ZeroOrMore, - /// Kleene plus (`+`) for one or more repetitions - OneOrMore, - /// Kleene optional (`?`) for zero or one reptitions - ZeroOrOne, -} - -/// Similar to `tokenstream::TokenTree`, except that `$i`, `$i:ident`, and `$(...)` -/// are "first-class" token trees. Useful for parsing macros. -#[derive(Debug, Clone, PartialEq, RustcEncodable, RustcDecodable)] -crate enum TokenTree { - Token(Token), - Delimited(DelimSpan, Lrc), - /// A kleene-style repetition sequence - Sequence(DelimSpan, Lrc), - /// e.g., `$var` - MetaVar(Span, ast::Ident), - /// e.g., `$var:expr`. This is only used in the left hand side of MBE macros. - MetaVarDecl( - Span, - ast::Ident, /* name to bind */ - ast::Ident, /* kind of nonterminal */ - ), -} - -impl TokenTree { - /// Return the number of tokens in the tree. - crate fn len(&self) -> usize { - match *self { - TokenTree::Delimited(_, ref delimed) => match delimed.delim { - token::NoDelim => delimed.tts.len(), - _ => delimed.tts.len() + 2, - }, - TokenTree::Sequence(_, ref seq) => seq.tts.len(), - _ => 0, - } - } - - /// Returns `true` if the given token tree is delimited. - crate fn is_delimited(&self) -> bool { - match *self { - TokenTree::Delimited(..) => true, - _ => false, - } - } - - /// Returns `true` if the given token tree is a token of the given kind. - crate fn is_token(&self, expected_kind: &TokenKind) -> bool { - match self { - TokenTree::Token(Token { kind: actual_kind, .. }) => actual_kind == expected_kind, - _ => false, - } - } - - /// Gets the `index`-th sub-token-tree. This only makes sense for delimited trees and sequences. - crate fn get_tt(&self, index: usize) -> TokenTree { - match (self, index) { - (&TokenTree::Delimited(_, ref delimed), _) if delimed.delim == token::NoDelim => { - delimed.tts[index].clone() - } - (&TokenTree::Delimited(span, ref delimed), _) => { - if index == 0 { - return delimed.open_tt(span.open); - } - if index == delimed.tts.len() + 1 { - return delimed.close_tt(span.close); - } - delimed.tts[index - 1].clone() - } - (&TokenTree::Sequence(_, ref seq), _) => seq.tts[index].clone(), - _ => panic!("Cannot expand a token tree"), - } - } - - /// Retrieves the `TokenTree`'s span. - crate fn span(&self) -> Span { - match *self { - TokenTree::Token(Token { span, .. }) - | TokenTree::MetaVar(span, _) - | TokenTree::MetaVarDecl(span, _, _) => span, - TokenTree::Delimited(span, _) | TokenTree::Sequence(span, _) => span.entire(), - } - } - - crate fn token(kind: TokenKind, span: Span) -> TokenTree { - TokenTree::Token(Token::new(kind, span)) - } -} - -/// Takes a `tokenstream::TokenStream` and returns a `Vec`. Specifically, this -/// takes a generic `TokenStream`, such as is used in the rest of the compiler, and returns a -/// collection of `TokenTree` for use in parsing a macro. -/// -/// # Parameters -/// -/// - `input`: a token stream to read from, the contents of which we are parsing. -/// - `expect_matchers`: `parse` can be used to parse either the "patterns" or the "body" of a -/// macro. Both take roughly the same form _except_ that in a pattern, metavars are declared with -/// their "matcher" type. For example `$var:expr` or `$id:ident`. In this example, `expr` and -/// `ident` are "matchers". They are not present in the body of a macro rule -- just in the -/// pattern, so we pass a parameter to indicate whether to expect them or not. -/// - `sess`: the parsing session. Any errors will be emitted to this session. -/// - `features`, `attrs`: language feature flags and attributes so that we know whether to use -/// unstable features or not. -/// - `edition`: which edition are we in. -/// - `macro_node_id`: the NodeId of the macro we are parsing. -/// -/// # Returns -/// -/// A collection of `self::TokenTree`. There may also be some errors emitted to `sess`. -crate fn parse( - input: tokenstream::TokenStream, - expect_matchers: bool, - sess: &ParseSess, - features: &Features, - attrs: &[ast::Attribute], - edition: Edition, - macro_node_id: NodeId, -) -> Vec { - // Will contain the final collection of `self::TokenTree` - let mut result = Vec::new(); - - // For each token tree in `input`, parse the token into a `self::TokenTree`, consuming - // additional trees if need be. - let mut trees = input.trees().peekable(); - while let Some(tree) = trees.next() { - // Given the parsed tree, if there is a metavar and we are expecting matchers, actually - // parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`). - let tree = parse_tree( - tree, - &mut trees, - expect_matchers, - sess, - features, - attrs, - edition, - macro_node_id, - ); - match tree { - TokenTree::MetaVar(start_sp, ident) if expect_matchers => { - let span = match trees.next() { - Some(tokenstream::TokenTree::Token(Token { kind: token::Colon, span })) => { - match trees.next() { - Some(tokenstream::TokenTree::Token(token)) => match token.ident() { - Some((kind, _)) => { - let span = token.span.with_lo(start_sp.lo()); - result.push(TokenTree::MetaVarDecl(span, ident, kind)); - continue; - } - _ => token.span, - }, - tree => tree.as_ref().map(tokenstream::TokenTree::span).unwrap_or(span), - } - } - tree => tree.as_ref().map(tokenstream::TokenTree::span).unwrap_or(start_sp), - }; - sess.missing_fragment_specifiers.borrow_mut().insert(span); - result.push(TokenTree::MetaVarDecl(span, ident, ast::Ident::invalid())); - } - - // Not a metavar or no matchers allowed, so just return the tree - _ => result.push(tree), - } - } - result -} - -/// Takes a `tokenstream::TokenTree` and returns a `self::TokenTree`. Specifically, this takes a -/// generic `TokenTree`, such as is used in the rest of the compiler, and returns a `TokenTree` -/// for use in parsing a macro. -/// -/// Converting the given tree may involve reading more tokens. -/// -/// # Parameters -/// -/// - `tree`: the tree we wish to convert. -/// - `trees`: an iterator over trees. We may need to read more tokens from it in order to finish -/// converting `tree` -/// - `expect_matchers`: same as for `parse` (see above). -/// - `sess`: the parsing session. Any errors will be emitted to this session. -/// - `features`, `attrs`: language feature flags and attributes so that we know whether to use -/// unstable features or not. -fn parse_tree( - tree: tokenstream::TokenTree, - trees: &mut Peekable>, - expect_matchers: bool, - sess: &ParseSess, - features: &Features, - attrs: &[ast::Attribute], - edition: Edition, - macro_node_id: NodeId, -) -> TokenTree { - // Depending on what `tree` is, we could be parsing different parts of a macro - match tree { - // `tree` is a `$` token. Look at the next token in `trees` - tokenstream::TokenTree::Token(Token { kind: token::Dollar, span }) => match trees.next() { - // `tree` is followed by a delimited set of token trees. This indicates the beginning - // of a repetition sequence in the macro (e.g. `$(pat)*`). - Some(tokenstream::TokenTree::Delimited(span, delim, tts)) => { - // Must have `(` not `{` or `[` - if delim != token::Paren { - let tok = pprust::token_kind_to_string(&token::OpenDelim(delim)); - let msg = format!("expected `(`, found `{}`", tok); - sess.span_diagnostic.span_err(span.entire(), &msg); - } - // Parse the contents of the sequence itself - let sequence = parse( - tts.into(), - expect_matchers, - sess, - features, - attrs, - edition, - macro_node_id, - ); - // Get the Kleene operator and optional separator - let (separator, kleene) = parse_sep_and_kleene_op(trees, span.entire(), sess); - // Count the number of captured "names" (i.e., named metavars) - let name_captures = macro_parser::count_names(&sequence); - TokenTree::Sequence( - span, - Lrc::new(SequenceRepetition { - tts: sequence, - separator, - kleene, - num_captures: name_captures, - }), - ) - } - - // `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate` special - // metavariable that names the crate of the invocation. - Some(tokenstream::TokenTree::Token(token)) if token.is_ident() => { - let (ident, is_raw) = token.ident().unwrap(); - let span = ident.span.with_lo(span.lo()); - if ident.name == kw::Crate && !is_raw { - TokenTree::token(token::Ident(kw::DollarCrate, is_raw), span) - } else { - TokenTree::MetaVar(span, ident) - } - } - - // `tree` is followed by a random token. This is an error. - Some(tokenstream::TokenTree::Token(token)) => { - let msg = - format!("expected identifier, found `{}`", pprust::token_to_string(&token),); - sess.span_diagnostic.span_err(token.span, &msg); - TokenTree::MetaVar(token.span, ast::Ident::invalid()) - } - - // There are no more tokens. Just return the `$` we already have. - None => TokenTree::token(token::Dollar, span), - }, - - // `tree` is an arbitrary token. Keep it. - tokenstream::TokenTree::Token(token) => TokenTree::Token(token), - - // `tree` is the beginning of a delimited set of tokens (e.g., `(` or `{`). We need to - // descend into the delimited set and further parse it. - tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited( - span, - Lrc::new(Delimited { - delim, - tts: parse( - tts.into(), - expect_matchers, - sess, - features, - attrs, - edition, - macro_node_id, - ), - }), - ), - } -} - -/// Takes a token and returns `Some(KleeneOp)` if the token is `+` `*` or `?`. Otherwise, return -/// `None`. -fn kleene_op(token: &Token) -> Option { - match token.kind { - token::BinOp(token::Star) => Some(KleeneOp::ZeroOrMore), - token::BinOp(token::Plus) => Some(KleeneOp::OneOrMore), - token::Question => Some(KleeneOp::ZeroOrOne), - _ => None, - } -} - -/// Parse the next token tree of the input looking for a KleeneOp. Returns -/// -/// - Ok(Ok((op, span))) if the next token tree is a KleeneOp -/// - Ok(Err(tok, span)) if the next token tree is a token but not a KleeneOp -/// - Err(span) if the next token tree is not a token -fn parse_kleene_op( - input: &mut impl Iterator, - span: Span, -) -> Result, Span> { - match input.next() { - Some(tokenstream::TokenTree::Token(token)) => match kleene_op(&token) { - Some(op) => Ok(Ok((op, token.span))), - None => Ok(Err(token)), - }, - tree => Err(tree.as_ref().map(tokenstream::TokenTree::span).unwrap_or(span)), - } -} - -/// Attempt to parse a single Kleene star, possibly with a separator. -/// -/// For example, in a pattern such as `$(a),*`, `a` is the pattern to be repeated, `,` is the -/// separator, and `*` is the Kleene operator. This function is specifically concerned with parsing -/// the last two tokens of such a pattern: namely, the optional separator and the Kleene operator -/// itself. Note that here we are parsing the _macro_ itself, rather than trying to match some -/// stream of tokens in an invocation of a macro. -/// -/// This function will take some input iterator `input` corresponding to `span` and a parsing -/// session `sess`. If the next one (or possibly two) tokens in `input` correspond to a Kleene -/// operator and separator, then a tuple with `(separator, KleeneOp)` is returned. Otherwise, an -/// error with the appropriate span is emitted to `sess` and a dummy value is returned. -fn parse_sep_and_kleene_op( - input: &mut Peekable>, - span: Span, - sess: &ParseSess, -) -> (Option, KleeneToken) { - // We basically look at two token trees here, denoted as #1 and #2 below - let span = match parse_kleene_op(input, span) { - // #1 is a `?`, `+`, or `*` KleeneOp - Ok(Ok((op, span))) => return (None, KleeneToken::new(op, span)), - - // #1 is a separator followed by #2, a KleeneOp - Ok(Err(token)) => match parse_kleene_op(input, token.span) { - // #2 is the `?` Kleene op, which does not take a separator (error) - Ok(Ok((KleeneOp::ZeroOrOne, span))) => { - // Error! - sess.span_diagnostic.span_err( - token.span, - "the `?` macro repetition operator does not take a separator", - ); - - // Return a dummy - return (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)); - } - - // #2 is a KleeneOp :D - Ok(Ok((op, span))) => return (Some(token), KleeneToken::new(op, span)), - - // #2 is a random token or not a token at all :( - Ok(Err(Token { span, .. })) | Err(span) => span, - }, - - // #1 is not a token - Err(span) => span, - }; - - // If we ever get to this point, we have experienced an "unexpected token" error - sess.span_diagnostic.span_err(span, "expected one of: `*`, `+`, or `?`"); - - // Return a dummy - (None, KleeneToken::new(KleeneOp::ZeroOrMore, span)) -} diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs deleted file mode 100644 index f9c07e3a2e4..00000000000 --- a/src/libsyntax/ext/tt/transcribe.rs +++ /dev/null @@ -1,398 +0,0 @@ -use crate::ast::{Ident, Mac}; -use crate::ext::base::ExtCtxt; -use crate::ext::tt::macro_parser::{MatchedNonterminal, MatchedSeq, NamedMatch}; -use crate::ext::tt::quoted; -use crate::mut_visit::{self, MutVisitor}; -use crate::parse::token::{self, NtTT, Token}; -use crate::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndJoint}; - -use smallvec::{smallvec, SmallVec}; - -use errors::pluralise; -use rustc_data_structures::fx::FxHashMap; -use rustc_data_structures::sync::Lrc; -use syntax_pos::hygiene::{ExpnId, Transparency}; -use syntax_pos::Span; - -use std::mem; - -// A Marker adds the given mark to the syntax context. -struct Marker(ExpnId, Transparency); - -impl MutVisitor for Marker { - fn visit_span(&mut self, span: &mut Span) { - *span = span.apply_mark(self.0, self.1) - } - - fn visit_mac(&mut self, mac: &mut Mac) { - mut_visit::noop_visit_mac(mac, self) - } -} - -impl Marker { - fn visit_delim_span(&mut self, dspan: &mut DelimSpan) { - self.visit_span(&mut dspan.open); - self.visit_span(&mut dspan.close); - } -} - -/// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`). -enum Frame { - Delimited { forest: Lrc, idx: usize, span: DelimSpan }, - Sequence { forest: Lrc, idx: usize, sep: Option }, -} - -impl Frame { - /// Construct a new frame around the delimited set of tokens. - fn new(tts: Vec) -> Frame { - let forest = Lrc::new(quoted::Delimited { delim: token::NoDelim, tts }); - Frame::Delimited { forest, idx: 0, span: DelimSpan::dummy() } - } -} - -impl Iterator for Frame { - type Item = quoted::TokenTree; - - fn next(&mut self) -> Option { - match *self { - Frame::Delimited { ref forest, ref mut idx, .. } => { - *idx += 1; - forest.tts.get(*idx - 1).cloned() - } - Frame::Sequence { ref forest, ref mut idx, .. } => { - *idx += 1; - forest.tts.get(*idx - 1).cloned() - } - } - } -} - -/// This can do Macro-By-Example transcription. -/// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the -/// invocation. We are assuming we already know there is a match. -/// - `src` is the RHS of the MBE, that is, the "example" we are filling in. -/// -/// For example, -/// -/// ```rust -/// macro_rules! foo { -/// ($id:ident) => { println!("{}", stringify!($id)); } -/// } -/// -/// foo!(bar); -/// ``` -/// -/// `interp` would contain `$id => bar` and `src` would contain `println!("{}", stringify!($id));`. -/// -/// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`. -/// -/// Along the way, we do some additional error checking. -pub(super) fn transcribe( - cx: &ExtCtxt<'_>, - interp: &FxHashMap, - src: Vec, - transparency: Transparency, -) -> TokenStream { - // Nothing for us to transcribe... - if src.is_empty() { - return TokenStream::empty(); - } - - // We descend into the RHS (`src`), expanding things as we go. This stack contains the things - // we have yet to expand/are still expanding. We start the stack off with the whole RHS. - let mut stack: SmallVec<[Frame; 1]> = smallvec![Frame::new(src)]; - - // As we descend in the RHS, we will need to be able to match nested sequences of matchers. - // `repeats` keeps track of where we are in matching at each level, with the last element being - // the most deeply nested sequence. This is used as a stack. - let mut repeats = Vec::new(); - - // `result` contains resulting token stream from the TokenTree we just finished processing. At - // the end, this will contain the full result of transcription, but at arbitrary points during - // `transcribe`, `result` will contain subsets of the final result. - // - // Specifically, as we descend into each TokenTree, we will push the existing results onto the - // `result_stack` and clear `results`. We will then produce the results of transcribing the - // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the - // `result_stack` and append `results` too it to produce the new `results` up to that point. - // - // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level - // again, and we are done transcribing. - let mut result: Vec = Vec::new(); - let mut result_stack = Vec::new(); - let mut marker = Marker(cx.current_expansion.id, transparency); - - loop { - // Look at the last frame on the stack. - let tree = if let Some(tree) = stack.last_mut().unwrap().next() { - // If it still has a TokenTree we have not looked at yet, use that tree. - tree - } - // The else-case never produces a value for `tree` (it `continue`s or `return`s). - else { - // Otherwise, if we have just reached the end of a sequence and we can keep repeating, - // go back to the beginning of the sequence. - if let Frame::Sequence { idx, sep, .. } = stack.last_mut().unwrap() { - let (repeat_idx, repeat_len) = repeats.last_mut().unwrap(); - *repeat_idx += 1; - if repeat_idx < repeat_len { - *idx = 0; - if let Some(sep) = sep { - result.push(TokenTree::Token(sep.clone()).into()); - } - continue; - } - } - - // We are done with the top of the stack. Pop it. Depending on what it was, we do - // different things. Note that the outermost item must be the delimited, wrapped RHS - // that was passed in originally to `transcribe`. - match stack.pop().unwrap() { - // Done with a sequence. Pop from repeats. - Frame::Sequence { .. } => { - repeats.pop(); - } - - // We are done processing a Delimited. If this is the top-level delimited, we are - // done. Otherwise, we unwind the result_stack to append what we have produced to - // any previous results. - Frame::Delimited { forest, span, .. } => { - if result_stack.is_empty() { - // No results left to compute! We are back at the top-level. - return TokenStream::new(result); - } - - // Step back into the parent Delimited. - let tree = - TokenTree::Delimited(span, forest.delim, TokenStream::new(result).into()); - result = result_stack.pop().unwrap(); - result.push(tree.into()); - } - } - continue; - }; - - // At this point, we know we are in the middle of a TokenTree (the last one on `stack`). - // `tree` contains the next `TokenTree` to be processed. - match tree { - // We are descending into a sequence. We first make sure that the matchers in the RHS - // and the matches in `interp` have the same shape. Otherwise, either the caller or the - // macro writer has made a mistake. - seq @ quoted::TokenTree::Sequence(..) => { - match lockstep_iter_size(&seq, interp, &repeats) { - LockstepIterSize::Unconstrained => { - cx.span_fatal( - seq.span(), /* blame macro writer */ - "attempted to repeat an expression containing no syntax variables \ - matched as repeating at this depth", - ); - } - - LockstepIterSize::Contradiction(ref msg) => { - // FIXME: this really ought to be caught at macro definition time... It - // happens when two meta-variables are used in the same repetition in a - // sequence, but they come from different sequence matchers and repeat - // different amounts. - cx.span_fatal(seq.span(), &msg[..]); - } - - LockstepIterSize::Constraint(len, _) => { - // We do this to avoid an extra clone above. We know that this is a - // sequence already. - let (sp, seq) = if let quoted::TokenTree::Sequence(sp, seq) = seq { - (sp, seq) - } else { - unreachable!() - }; - - // Is the repetition empty? - if len == 0 { - if seq.kleene.op == quoted::KleeneOp::OneOrMore { - // FIXME: this really ought to be caught at macro definition - // time... It happens when the Kleene operator in the matcher and - // the body for the same meta-variable do not match. - cx.span_fatal(sp.entire(), "this must repeat at least once"); - } - } else { - // 0 is the initial counter (we have done 0 repretitions so far). `len` - // is the total number of reptitions we should generate. - repeats.push((0, len)); - - // The first time we encounter the sequence we push it to the stack. It - // then gets reused (see the beginning of the loop) until we are done - // repeating. - stack.push(Frame::Sequence { - idx: 0, - sep: seq.separator.clone(), - forest: seq, - }); - } - } - } - } - - // Replace the meta-var with the matched token tree from the invocation. - quoted::TokenTree::MetaVar(mut sp, mut ident) => { - // Find the matched nonterminal from the macro invocation, and use it to replace - // the meta-var. - if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { - if let MatchedNonterminal(ref nt) = cur_matched { - // FIXME #2887: why do we apply a mark when matching a token tree meta-var - // (e.g. `$x:tt`), but not when we are matching any other type of token - // tree? - if let NtTT(ref tt) = **nt { - result.push(tt.clone().into()); - } else { - marker.visit_span(&mut sp); - let token = TokenTree::token(token::Interpolated(nt.clone()), sp); - result.push(token.into()); - } - } else { - // We were unable to descend far enough. This is an error. - cx.span_fatal( - sp, /* blame the macro writer */ - &format!("variable '{}' is still repeating at this depth", ident), - ); - } - } else { - // If we aren't able to match the meta-var, we push it back into the result but - // with modified syntax context. (I believe this supports nested macros). - marker.visit_span(&mut sp); - marker.visit_ident(&mut ident); - result.push(TokenTree::token(token::Dollar, sp).into()); - result.push(TokenTree::Token(Token::from_ast_ident(ident)).into()); - } - } - - // If we are entering a new delimiter, we push its contents to the `stack` to be - // processed, and we push all of the currently produced results to the `result_stack`. - // We will produce all of the results of the inside of the `Delimited` and then we will - // jump back out of the Delimited, pop the result_stack and add the new results back to - // the previous results (from outside the Delimited). - quoted::TokenTree::Delimited(mut span, delimited) => { - marker.visit_delim_span(&mut span); - stack.push(Frame::Delimited { forest: delimited, idx: 0, span }); - result_stack.push(mem::take(&mut result)); - } - - // Nothing much to do here. Just push the token to the result, being careful to - // preserve syntax context. - quoted::TokenTree::Token(token) => { - let mut tt = TokenTree::Token(token); - marker.visit_tt(&mut tt); - result.push(tt.into()); - } - - // There should be no meta-var declarations in the invocation of a macro. - quoted::TokenTree::MetaVarDecl(..) => panic!("unexpected `TokenTree::MetaVarDecl"), - } - } -} - -/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using -/// the set of matches `interpolations`. -/// -/// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend -/// into the right place in nested matchers. If we attempt to descend too far, the macro writer has -/// made a mistake, and we return `None`. -fn lookup_cur_matched<'a>( - ident: Ident, - interpolations: &'a FxHashMap, - repeats: &[(usize, usize)], -) -> Option<&'a NamedMatch> { - interpolations.get(&ident).map(|matched| { - let mut matched = matched; - for &(idx, _) in repeats { - match matched { - MatchedNonterminal(_) => break, - MatchedSeq(ref ads, _) => matched = ads.get(idx).unwrap(), - } - } - - matched - }) -} - -/// An accumulator over a TokenTree to be used with `fold`. During transcription, we need to make -/// sure that the size of each sequence and all of its nested sequences are the same as the sizes -/// of all the matched (nested) sequences in the macro invocation. If they don't match, somebody -/// has made a mistake (either the macro writer or caller). -#[derive(Clone)] -enum LockstepIterSize { - /// No constraints on length of matcher. This is true for any TokenTree variants except a - /// `MetaVar` with an actual `MatchedSeq` (as opposed to a `MatchedNonterminal`). - Unconstrained, - - /// A `MetaVar` with an actual `MatchedSeq`. The length of the match and the name of the - /// meta-var are returned. - Constraint(usize, Ident), - - /// Two `Constraint`s on the same sequence had different lengths. This is an error. - Contradiction(String), -} - -impl LockstepIterSize { - /// Find incompatibilities in matcher/invocation sizes. - /// - `Unconstrained` is compatible with everything. - /// - `Contradiction` is incompatible with everything. - /// - `Constraint(len)` is only compatible with other constraints of the same length. - fn with(self, other: LockstepIterSize) -> LockstepIterSize { - match self { - LockstepIterSize::Unconstrained => other, - LockstepIterSize::Contradiction(_) => self, - LockstepIterSize::Constraint(l_len, ref l_id) => match other { - LockstepIterSize::Unconstrained => self, - LockstepIterSize::Contradiction(_) => other, - LockstepIterSize::Constraint(r_len, _) if l_len == r_len => self, - LockstepIterSize::Constraint(r_len, r_id) => { - let msg = format!( - "meta-variable `{}` repeats {} time{}, but `{}` repeats {} time{}", - l_id, - l_len, - pluralise!(l_len), - r_id, - r_len, - pluralise!(r_len), - ); - LockstepIterSize::Contradiction(msg) - } - }, - } - } -} - -/// Given a `tree`, make sure that all sequences have the same length as the matches for the -/// appropriate meta-vars in `interpolations`. -/// -/// Note that if `repeats` does not match the exact correct depth of a meta-var, -/// `lookup_cur_matched` will return `None`, which is why this still works even in the presnece of -/// multiple nested matcher sequences. -fn lockstep_iter_size( - tree: "ed::TokenTree, - interpolations: &FxHashMap, - repeats: &[(usize, usize)], -) -> LockstepIterSize { - use quoted::TokenTree; - match *tree { - TokenTree::Delimited(_, ref delimed) => { - delimed.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { - size.with(lockstep_iter_size(tt, interpolations, repeats)) - }) - } - TokenTree::Sequence(_, ref seq) => { - seq.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { - size.with(lockstep_iter_size(tt, interpolations, repeats)) - }) - } - TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => { - match lookup_cur_matched(name, interpolations, repeats) { - Some(matched) => match matched { - MatchedNonterminal(_) => LockstepIterSize::Unconstrained, - MatchedSeq(ref ads, _) => LockstepIterSize::Constraint(ads.len(), name), - }, - _ => LockstepIterSize::Unconstrained, - } - } - TokenTree::Token(..) => LockstepIterSize::Unconstrained, - } -} -- cgit 1.4.1-3-g733a5