From 6680c9c5c797101fc5e0608cb2c3657517333148 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Fri, 2 Jan 2015 16:41:24 -0500 Subject: syntax: implement 'macro input future proofing' See RFC 550 (https://github.com/rust-lang/rfcs/pull/550) for the motivation and details. If this breaks your code, add one of the listed tokens after the relevant non-terminal in your matcher. [breaking-change] --- src/libsyntax/ext/tt/macro_rules.rs | 148 ++++++++++++++++++++++++++++++++++-- 1 file changed, 140 insertions(+), 8 deletions(-) (limited to 'src/libsyntax/ext') diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 9837c8088fa..96a0f7de0fd 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -1,4 +1,4 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use ast::{Ident, TtDelimited, TtSequence, TtToken}; +use ast::{TokenTree, TtDelimited, TtSequence, TtToken}; use ast; use codemap::{Span, DUMMY_SP}; use ext::base::{ExtCtxt, MacResult, SyntaxExtension}; @@ -19,8 +19,8 @@ use ext::tt::macro_parser::{parse, parse_or_else}; use parse::lexer::new_tt_reader; use parse::parser::Parser; use parse::attr::ParserAttr; -use parse::token::{special_idents, gensym_ident}; -use parse::token::{MatchNt, NtTT}; +use parse::token::{special_idents, gensym_ident, NtTT, Token}; +use parse::token::Token::*; use parse::token; use print; use ptr::P; @@ -109,8 +109,8 @@ impl<'a> MacResult for ParserAnyMacro<'a> { } struct MacroRulesMacroExpander { - name: Ident, - imported_from: Option, + name: ast::Ident, + imported_from: Option, lhses: Vec>, rhses: Vec>, } @@ -134,8 +134,8 @@ impl TTMacroExpander for MacroRulesMacroExpander { /// Given `lhses` and `rhses`, this is the new macro we create fn generic_extension<'cx>(cx: &'cx ExtCtxt, sp: Span, - name: Ident, - imported_from: Option, + name: ast::Ident, + imported_from: Option, arg: &[ast::TokenTree], lhses: &[Rc], rhses: &[Rc]) @@ -260,6 +260,10 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, _ => cx.span_bug(def.span, "wrong-structured lhs") }; + for lhs in lhses.iter() { + check_lhs_nt_follows(cx, &**lhs, def.span); + } + let rhses = match *argument_map[rhs_nm] { MatchedSeq(ref s, _) => /* FIXME (#2543) */ (*s).clone(), _ => cx.span_bug(def.span, "wrong-structured rhs") @@ -274,3 +278,131 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, NormalTT(exp, Some(def.span)) } + +fn check_lhs_nt_follows(cx: &mut ExtCtxt, lhs: &NamedMatch, sp: Span) { + // lhs is going to be like MatchedNonterminal(NtTT(TtDelimited(...))), where + // the entire lhs is those tts. + // if ever we get box/deref patterns, this could turn into an `if let + // &MatchedNonterminal(NtTT(box TtDelimited(...))) = lhs` + let matcher = match lhs { + &MatchedNonterminal(NtTT(ref inner)) => match &**inner { + &TtDelimited(_, ref tts) => tts.tts[], + _ => cx.span_bug(sp, "wrong-structured lhs for follow check") + }, + _ => cx.span_bug(sp, "wrong-structured lhs for follow check") + }; + + check_matcher(cx, matcher, &Eof); + // we don't abort on errors on rejection, the driver will do that for us + // after parsing/expansion. we can report every error in every macro this way. +} + +fn check_matcher(cx: &mut ExtCtxt, matcher: &[TokenTree], follow: &Token) { + use print::pprust::token_to_string; + + // 1. If there are no tokens in M, accept + if matcher.is_empty() { + return; + } + + // 2. For each token T in M: + let mut tokens = matcher.iter().peekable(); + while let Some(token) = tokens.next() { + match *token { + TtToken(sp, MatchNt(ref name, ref frag_spec, _, _)) => { + // ii. If T is a simple NT, look ahead to the next token T' in + // M. + let next_token = match tokens.peek() { + // If T' closes a complex NT, replace T' with F + Some(&&TtToken(_, CloseDelim(_))) => follow, + Some(&&TtToken(_, ref tok)) => tok, + // T' is any NT (this catches complex NTs, the next + // iteration will die if it's a TtDelimited). + Some(_) => continue, + // else, we're at the end of the macro or sequence + None => follow + }; + + // If T' is in the set FOLLOW(NT), continue. Else, reject. + match *next_token { + Eof | MatchNt(..) => continue, + _ if is_in_follow(cx, next_token, frag_spec.as_str()) => continue, + ref tok => cx.span_err(sp, format!("`${0}:{1}` is followed by `{2}`, which \ + is not allowed for `{1}` fragments", + name.as_str(), frag_spec.as_str(), + token_to_string(tok))[]) + } + }, + TtSequence(_, ref seq) => { + // iii. Else, T is a complex NT. + match seq.separator { + // If T has the form $(...)U+ or $(...)U* for some token U, + // run the algorithm on the contents with F set to U. If it + // accepts, continue, else, reject. + Some(ref u) => check_matcher(cx, seq.tts[], u), + // If T has the form $(...)+ or $(...)*, run the algorithm + // on the contents with F set to EOF. If it accepts, + // continue, else, reject. + None => check_matcher(cx, seq.tts[], &Eof) + } + }, + TtToken(..) => { + // i. If T is not an NT, continue. + continue + }, + TtDelimited(_, ref tts) => { + // if we don't pass in that close delimiter, we'll incorrectly consider the matcher + // `{ $foo:ty }` as having a follow that isn't `}` + check_matcher(cx, tts.tts[], &tts.close_token()) + } + } + } +} + +fn is_in_follow(cx: &ExtCtxt, tok: &Token, frag: &str) -> bool { + if let &CloseDelim(_) = tok { + return true; + } + + match frag { + "item" => { + // since items *must* be followed by either a `;` or a `}`, we can + // accept anything after them + true + }, + "block" => { + // anything can follow block, the braces provide a easy boundary to + // maintain + true + }, + "stmt" | "expr" => { + match *tok { + Comma | Semi => true, + _ => false + } + }, + "pat" => { + match *tok { + FatArrow | Comma | Eq => true, + _ => false + } + }, + "path" | "ty" => { + match *tok { + Comma | RArrow | Colon | Eq | Gt => true, + Ident(i, _) if i.as_str() == "as" => true, + _ => false + } + }, + "ident" => { + // being a single token, idents are harmless + true + }, + "meta" | "tt" => { + // being either a single token or a delimited sequence, tt is + // harmless + true + }, + _ => cx.bug(format!("unrecognized builtin nonterminal {}", frag)[]), + } +} -- cgit 1.4.1-3-g733a5 From ac8e10519a298cdad3acb50506af3eec79995729 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Tue, 6 Jan 2015 18:02:00 -0500 Subject: Stricter rules surrounding adjacent nonterminals and sequences --- src/libcore/macros.rs | 5 +- src/libsyntax/ext/tt/macro_rules.rs | 103 +++++++++++++++------ .../compile-fail/macro-input-future-proofing.rs | 4 + 3 files changed, 82 insertions(+), 30 deletions(-) (limited to 'src/libsyntax/ext') diff --git a/src/libcore/macros.rs b/src/libcore/macros.rs index a579f9db416..14e0be2cf16 100644 --- a/src/libcore/macros.rs +++ b/src/libcore/macros.rs @@ -186,8 +186,11 @@ macro_rules! write { #[macro_export] #[stable] macro_rules! writeln { - ($dst:expr, $fmt:expr $($arg:tt)*) => ( + ($dst:expr, $fmt:expr, $($arg:tt)*) => ( write!($dst, concat!($fmt, "\n") $($arg)*) + ); + ($dst:expr, $fmt:expr) => ( + write!($dst, concat!($fmt, "\n")) ) } diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 96a0f7de0fd..9e1b18ad18a 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -292,58 +292,102 @@ fn check_lhs_nt_follows(cx: &mut ExtCtxt, lhs: &NamedMatch, sp: Span) { _ => cx.span_bug(sp, "wrong-structured lhs for follow check") }; - check_matcher(cx, matcher, &Eof); + check_matcher(cx, matcher.iter(), &Eof); // we don't abort on errors on rejection, the driver will do that for us // after parsing/expansion. we can report every error in every macro this way. } -fn check_matcher(cx: &mut ExtCtxt, matcher: &[TokenTree], follow: &Token) { +// returns the last token that was checked, for TtSequence. this gets used later on. +fn check_matcher<'a, I>(cx: &mut ExtCtxt, matcher: I, follow: &Token) +-> Option<(Span, Token)> where I: Iterator { use print::pprust::token_to_string; - // 1. If there are no tokens in M, accept - if matcher.is_empty() { - return; - } + let mut last = None; // 2. For each token T in M: - let mut tokens = matcher.iter().peekable(); + let mut tokens = matcher.peekable(); while let Some(token) = tokens.next() { - match *token { + last = match *token { TtToken(sp, MatchNt(ref name, ref frag_spec, _, _)) => { // ii. If T is a simple NT, look ahead to the next token T' in // M. let next_token = match tokens.peek() { // If T' closes a complex NT, replace T' with F - Some(&&TtToken(_, CloseDelim(_))) => follow, - Some(&&TtToken(_, ref tok)) => tok, - // T' is any NT (this catches complex NTs, the next - // iteration will die if it's a TtDelimited). - Some(_) => continue, + Some(&&TtToken(_, CloseDelim(_))) => follow.clone(), + Some(&&TtToken(_, ref tok)) => tok.clone(), + Some(&&TtSequence(sp, _)) => { + cx.span_err(sp, format!("`${0}:{1}` is followed by a sequence \ + repetition, which is not allowed for `{1}` \ + fragments", name.as_str(), frag_spec.as_str())[]); + Eof + }, + // die next iteration + Some(&&TtDelimited(_, ref delim)) => delim.close_token(), // else, we're at the end of the macro or sequence - None => follow + None => follow.clone() }; + let tok = if let TtToken(_, ref tok) = *token { tok } else { unreachable!() }; // If T' is in the set FOLLOW(NT), continue. Else, reject. - match *next_token { - Eof | MatchNt(..) => continue, - _ if is_in_follow(cx, next_token, frag_spec.as_str()) => continue, - ref tok => cx.span_err(sp, format!("`${0}:{1}` is followed by `{2}`, which \ - is not allowed for `{1}` fragments", - name.as_str(), frag_spec.as_str(), - token_to_string(tok))[]) + match &next_token { + &Eof => return Some((sp, tok.clone())), + _ if is_in_follow(cx, &next_token, frag_spec.as_str()) => continue, + next => { + cx.span_err(sp, format!("`${0}:{1}` is followed by `{2}`, which \ + is not allowed for `{1}` fragments", + name.as_str(), frag_spec.as_str(), + token_to_string(next))[]); + continue + }, } }, - TtSequence(_, ref seq) => { + TtSequence(sp, ref seq) => { // iii. Else, T is a complex NT. match seq.separator { // If T has the form $(...)U+ or $(...)U* for some token U, // run the algorithm on the contents with F set to U. If it // accepts, continue, else, reject. - Some(ref u) => check_matcher(cx, seq.tts[], u), - // If T has the form $(...)+ or $(...)*, run the algorithm - // on the contents with F set to EOF. If it accepts, - // continue, else, reject. - None => check_matcher(cx, seq.tts[], &Eof) + Some(ref u) => { + let last = check_matcher(cx, seq.tts.iter(), u); + match last { + // Since the delimiter isn't required after the last repetition, make + // sure that the *next* token is sane. This doesn't actually compute + // the FIRST of the rest of the matcher yet, it only considers single + // tokens and simple NTs. This is imprecise, but conservatively + // correct. + Some((span, tok)) => { + let fol = match tokens.peek() { + Some(&&TtToken(_, ref tok)) => tok.clone(), + Some(&&TtDelimited(_, ref delim)) => delim.close_token(), + Some(_) => { + cx.span_err(sp, "sequence repetition followed by \ + another sequence repetition, which is not allowed"); + Eof + }, + None => Eof + }; + check_matcher(cx, Some(&TtToken(span, tok.clone())).into_iter(), + &fol) + }, + None => last, + } + }, + // If T has the form $(...)+ or $(...)*, run the algorithm on the contents with + // F set to the token following the sequence. If it accepts, continue, else, + // reject. + None => { + let fol = match tokens.peek() { + Some(&&TtToken(_, ref tok)) => tok.clone(), + Some(&&TtDelimited(_, ref delim)) => delim.close_token(), + Some(_) => { + cx.span_err(sp, "sequence repetition followed by another \ + sequence repetition, which is not allowed"); + Eof + }, + None => Eof + }; + check_matcher(cx, seq.tts.iter(), &fol) + } } }, TtToken(..) => { @@ -352,11 +396,12 @@ fn check_matcher(cx: &mut ExtCtxt, matcher: &[TokenTree], follow: &Token) { }, TtDelimited(_, ref tts) => { // if we don't pass in that close delimiter, we'll incorrectly consider the matcher - // `{ $foo:ty }` as having a follow that isn't `}` - check_matcher(cx, tts.tts[], &tts.close_token()) + // `{ $foo:ty }` as having a follow that isn't `RBrace` + check_matcher(cx, tts.tts.iter(), &tts.close_token()) } } } + last } fn is_in_follow(cx: &ExtCtxt, tok: &Token, frag: &str) -> bool { diff --git a/src/test/compile-fail/macro-input-future-proofing.rs b/src/test/compile-fail/macro-input-future-proofing.rs index 1f2db624065..15f6d88fd89 100644 --- a/src/test/compile-fail/macro-input-future-proofing.rs +++ b/src/test/compile-fail/macro-input-future-proofing.rs @@ -20,6 +20,10 @@ macro_rules! errors_everywhere { ($pa:pat , ) => (); ($pa:pat | ) => (); //~ ERROR `$pa:pat` is followed by `|` ($pa:pat $pb:pat $ty:ty ,) => (); + //~^ ERROR `$pa:pat` is followed by `$pb:pat`, which is not allowed + //~^^ ERROR `$pb:pat` is followed by `$ty:ty`, which is not allowed + ($($ty:ty)* -) => (); //~ ERROR `$ty:ty` is followed by `-` + ($($a:ty, $b:ty)* -) => (); //~ ERROR `$b:ty` is followed by `-` ($($ty:ty)-+) => (); //~ ERROR `$ty:ty` is followed by `-`, which is not allowed for `ty` } -- cgit 1.4.1-3-g733a5 From bd4119f9654eda89e359234a08b1ac4fae53287c Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Tue, 6 Jan 2015 18:46:37 -0500 Subject: Minor fallout/update FOLLOW sets --- src/libcore/macros.rs | 8 ++++---- src/libstd/rt/macros.rs | 16 ++++++++++++---- src/libsyntax/ext/tt/macro_rules.rs | 4 ++-- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'src/libsyntax/ext') diff --git a/src/libcore/macros.rs b/src/libcore/macros.rs index 14e0be2cf16..fcd93ad4a02 100644 --- a/src/libcore/macros.rs +++ b/src/libcore/macros.rs @@ -186,12 +186,12 @@ macro_rules! write { #[macro_export] #[stable] macro_rules! writeln { - ($dst:expr, $fmt:expr, $($arg:tt)*) => ( - write!($dst, concat!($fmt, "\n") $($arg)*) - ); ($dst:expr, $fmt:expr) => ( write!($dst, concat!($fmt, "\n")) - ) + ); + ($dst:expr, $fmt:expr, $($arg:expr),*) => ( + write!($dst, concat!($fmt, "\n"), $($arg,)*) + ); } /// A utility macro for indicating unreachable code. diff --git a/src/libstd/rt/macros.rs b/src/libstd/rt/macros.rs index bbc96d0b19f..1e3ab6d34da 100644 --- a/src/libstd/rt/macros.rs +++ b/src/libstd/rt/macros.rs @@ -14,16 +14,24 @@ //! they aren't defined anywhere outside of the `rt` module. macro_rules! rterrln { - ($fmt:expr $($arg:tt)*) => ( { - ::rt::util::dumb_print(format_args!(concat!($fmt, "\n") $($arg)*)) + ($fmt:expr) => ( { + ::rt::util::dumb_print(format_args!(concat!($fmt, "\n"))) + } ); + ($fmt:expr, $($arg:expr),*) => ( { + ::rt::util::dumb_print(format_args!(concat!($fmt, "\n"), $($arg)*)) } ) } // Some basic logging. Enabled by passing `--cfg rtdebug` to the libstd build. macro_rules! rtdebug { - ($($arg:tt)*) => ( { + ($arg:expr) => ( { if cfg!(rtdebug) { - rterrln!($($arg)*) + rterrln!($arg) + } + } ); + ($str:expr, $($arg:expr),*) => ( { + if cfg!(rtdebug) { + rterrln!($str, $($arg)*) } }) } diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 9e1b18ad18a..6a064ec1313 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -422,7 +422,7 @@ fn is_in_follow(cx: &ExtCtxt, tok: &Token, frag: &str) -> bool { }, "stmt" | "expr" => { match *tok { - Comma | Semi => true, + FatArrow | Comma | Semi => true, _ => false } }, @@ -434,7 +434,7 @@ fn is_in_follow(cx: &ExtCtxt, tok: &Token, frag: &str) -> bool { }, "path" | "ty" => { match *tok { - Comma | RArrow | Colon | Eq | Gt => true, + Comma | FatArrow | Colon | Eq | Gt => true, Ident(i, _) if i.as_str() == "as" => true, _ => false } -- cgit 1.4.1-3-g733a5