Refactor the parser to consume token trees.

author: Jeffrey Seyfried <jeffrey.seyfried@gmail.com> 2017-01-13 04:49:20 +0000
committer: Jeffrey Seyfried <jeffrey.seyfried@gmail.com> 2017-01-17 08:17:26 +0000
commit: debcbf0b8e8fcf6f1d44e8f79cc06c0866d8d1dd (patch)
tree: f61d8ca01c5e888b1f18e25dcb516d80a54b875d /src/libsyntax
parent: de46b247585999ae70674f1fa0543d62f2889c7f (diff)
download: rust-debcbf0b8e8fcf6f1d44e8f79cc06c0866d8d1dd.tar.gz
rust-debcbf0b8e8fcf6f1d44e8f79cc06c0866d8d1dd.zip
8 files changed, 52 insertions, 197 deletions
diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs
index 68d261c64f8..edf74e1fe19 100644
--- a/src/libsyntax/ext/base.rs
+++ b/src/libsyntax/ext/base.rs
@@ -615,9 +615,7 @@ impl<'a> ExtCtxt<'a> {
 
     pub fn new_parser_from_tts(&self, tts: &[tokenstream::TokenTree])
         -> parser::Parser<'a> {
-        let mut parser = parse::tts_to_parser(self.parse_sess, tts.to_vec());
-        parser.allow_interpolated_tts = false; // FIXME(jseyfried) `quote!` can't handle these yet
-        parser
+        parse::tts_to_parser(self.parse_sess, tts.to_vec())
     }
     pub fn codemap(&self) -> &'a CodeMap { self.parse_sess.codemap() }
     pub fn parse_sess(&self) -> &'a parse::ParseSess { self.parse_sess }
diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs
index 2de31166070..46ffc93d2ee 100644
--- a/src/libsyntax/ext/tt/macro_parser.rs
+++ b/src/libsyntax/ext/tt/macro_parser.rs
@@ -82,7 +82,6 @@ use ast::Ident;
 use syntax_pos::{self, BytePos, mk_sp, Span};
 use codemap::Spanned;
 use errors::FatalError;
-use parse::lexer::*; //resolve bug?
 use parse::{Directory, ParseSess};
 use parse::parser::{PathStyle, Parser};
 use parse::token::{DocComment, MatchNt, SubstNt};
@@ -407,9 +406,9 @@ fn inner_parse_loop(cur_eis: &mut SmallVector<Box<MatcherPos>>,
     Success(())
 }
 
-pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree], directory: Option<Directory>)
+pub fn parse(sess: &ParseSess, tts: Vec<TokenTree>, ms: &[TokenTree], directory: Option<Directory>)
              -> NamedParseResult {
-    let mut parser = Parser::new(sess, Box::new(rdr), directory, true);
+    let mut parser = Parser::new(sess, tts, directory, true);
     let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo));
     let mut next_eis = Vec::new(); // or proceed normally
 
@@ -527,7 +526,7 @@ fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal {
         "ident" => match p.token {
             token::Ident(sn) => {
                 p.bump();
-                token::NtIdent(Spanned::<Ident>{node: sn, span: p.span})
+                token::NtIdent(Spanned::<Ident>{node: sn, span: p.prev_span})
             }
             _ => {
                 let token_str = pprust::token_to_string(&p.token);
diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs
index 3abd24b50ba..585232c5462 100644
--- a/src/libsyntax/ext/tt/macro_rules.rs
+++ b/src/libsyntax/ext/tt/macro_rules.rs
@@ -16,8 +16,8 @@ use ext::expand::{Expansion, ExpansionKind};
 use ext::tt::macro_parser::{Success, Error, Failure};
 use ext::tt::macro_parser::{MatchedSeq, MatchedNonterminal};
 use ext::tt::macro_parser::{parse, parse_failure_msg};
+use ext::tt::transcribe::new_tt_reader;
 use parse::{Directory, ParseSess};
-use parse::lexer::new_tt_reader;
 use parse::parser::Parser;
 use parse::token::{self, NtTT, Token};
 use parse::token::Token::*;
@@ -113,13 +113,21 @@ fn generic_extension<'cx>(cx: &'cx ExtCtxt,
                     _ => cx.span_bug(sp, "malformed macro rhs"),
                 };
                 // rhs has holes ( `$id` and `$(...)` that need filled)
-                let trncbr =
+                let mut trncbr =
                     new_tt_reader(&cx.parse_sess.span_diagnostic, Some(named_matches), rhs);
+                let mut tts = Vec::new();
+                loop {
+                    let tok = trncbr.real_token();
+                    if tok.tok == token::Eof {
+                        break
+                    }
+                    tts.push(TokenTree::Token(tok.sp, tok.tok));
+                }
                 let directory = Directory {
                     path: cx.current_expansion.module.directory.clone(),
                     ownership: cx.current_expansion.directory_ownership,
                 };
-                let mut p = Parser::new(cx.parse_sess(), Box::new(trncbr), Some(directory), false);
+                let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), false);
                 p.root_module_name = cx.current_expansion.module.mod_path.last()
                     .map(|id| (*id.name.as_str()).to_owned());
 
@@ -187,10 +195,8 @@ pub fn compile(sess: &ParseSess, def: &ast::MacroDef) -> SyntaxExtension {
         })),
     ];
 
-    // Parse the macro_rules! invocation (`none` is for no interpolations):
-    let arg_reader = new_tt_reader(&sess.span_diagnostic, None, def.body.clone());
-
-    let argument_map = match parse(sess, arg_reader, &argument_gram, None) {
+    // Parse the macro_rules! invocation
+    let argument_map = match parse(sess, def.body.clone(), &argument_gram, None) {
         Success(m) => m,
         Failure(sp, tok) => {
             let s = parse_failure_msg(tok);
diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs
index 37e329e5d3b..82f1e183895 100644
--- a/src/libsyntax/ext/tt/transcribe.rs
+++ b/src/libsyntax/ext/tt/transcribe.rs
@@ -10,7 +10,7 @@
 use self::LockstepIterSize::*;
 
 use ast::Ident;
-use errors::{Handler, DiagnosticBuilder};
+use errors::Handler;
 use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal};
 use parse::token::{self, MatchNt, SubstNt, Token, NtIdent};
 use parse::lexer::TokenAndSpan;
@@ -44,8 +44,12 @@ pub struct TtReader<'a> {
     /* cached: */
     pub cur_tok: Token,
     pub cur_span: Span,
-    /// Transform doc comments. Only useful in macro invocations
-    pub fatal_errs: Vec<DiagnosticBuilder<'a>>,
+}
+
+impl<'a> TtReader<'a> {
+    pub fn real_token(&mut self) -> TokenAndSpan {
+        tt_next_token(self)
+    }
 }
 
 /// This can do Macro-By-Example transcription. On the other hand, if
@@ -76,7 +80,6 @@ pub fn new_tt_reader(sp_diag: &Handler,
         /* dummy values, never read: */
         cur_tok: token::Eof,
         cur_span: DUMMY_SP,
-        fatal_errs: Vec::new(),
     };
     tt_next_token(&mut r); /* get cur_tok and cur_span set up */
     r
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 6c6161998d7..12b9130c474 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -12,7 +12,6 @@ use ast::{self, Ident};
 use syntax_pos::{self, BytePos, CharPos, Pos, Span};
 use codemap::CodeMap;
 use errors::{FatalError, DiagnosticBuilder};
-use ext::tt::transcribe::tt_next_token;
 use parse::{token, ParseSess};
 use str::char_at;
 use symbol::{Symbol, keywords};
@@ -23,53 +22,10 @@ use std::char;
 use std::mem::replace;
 use std::rc::Rc;
 
-pub use ext::tt::transcribe::{TtReader, new_tt_reader};
-
 pub mod comments;
 mod tokentrees;
 mod unicode_chars;
 
-pub trait Reader {
-    fn is_eof(&self) -> bool;
-    fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>;
-    fn next_token(&mut self) -> TokenAndSpan where Self: Sized {
-        let res = self.try_next_token();
-        self.unwrap_or_abort(res)
-    }
-    /// Report a fatal error with the current span.
-    fn fatal(&self, &str) -> FatalError;
-    /// Report a non-fatal error with the current span.
-    fn err(&self, &str);
-    fn emit_fatal_errors(&mut self);
-    fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan {
-        match res {
-            Ok(tok) => tok,
-            Err(_) => {
-                self.emit_fatal_errors();
-                panic!(FatalError);
-            }
-        }
-    }
-    fn peek(&self) -> TokenAndSpan;
-    /// Get a token the parser cares about.
-    fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> {
-        let mut t = self.try_next_token()?;
-        loop {
-            match t.tok {
-                token::Whitespace | token::Comment | token::Shebang(_) => {
-                    t = self.try_next_token()?;
-                }
-                _ => break,
-            }
-        }
-        Ok(t)
-    }
-    fn real_token(&mut self) -> TokenAndSpan {
-        let res = self.try_real_token();
-        self.unwrap_or_abort(res)
-    }
-}
-
 #[derive(Clone, PartialEq, Eq, Debug)]
 pub struct TokenAndSpan {
     pub tok: token::Token,
@@ -182,36 +138,6 @@ impl<'a> StringReader<'a> {
     }
 }
 
-impl<'a> Reader for TtReader<'a> {
-    fn is_eof(&self) -> bool {
-        self.peek().tok == token::Eof
-    }
-    fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> {
-        assert!(self.fatal_errs.is_empty());
-        let r = tt_next_token(self);
-        debug!("TtReader: r={:?}", r);
-        Ok(r)
-    }
-    fn fatal(&self, m: &str) -> FatalError {
-        self.sp_diag.span_fatal(self.cur_span, m)
-    }
-    fn err(&self, m: &str) {
-        self.sp_diag.span_err(self.cur_span, m);
-    }
-    fn emit_fatal_errors(&mut self) {
-        for err in &mut self.fatal_errs {
-            err.emit();
-        }
-        self.fatal_errs.clear();
-    }
-    fn peek(&self) -> TokenAndSpan {
-        TokenAndSpan {
-            tok: self.cur_tok.clone(),
-            sp: self.cur_span,
-        }
-    }
-}
-
 impl<'a> StringReader<'a> {
     /// For comments.rs, which hackily pokes into next_pos and ch
     pub fn new_raw<'b>(sess: &'a ParseSess, filemap: Rc<syntax_pos::FileMap>) -> Self {
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index 500e8285b4c..0937ef15b4d 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -45,7 +45,7 @@ pub mod obsolete;
 
 /// Info about a parsing session.
 pub struct ParseSess {
-    pub span_diagnostic: Handler, // better be the same as the one in the reader!
+    pub span_diagnostic: Handler,
     pub unstable_features: UnstableFeatures,
     pub config: CrateConfig,
     /// Used to determine and report recursive mod inclusions
@@ -227,8 +227,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>) -> Vec<tokenstream
 
 /// Given tts and the ParseSess, produce a parser
 pub fn tts_to_parser<'a>(sess: &'a ParseSess, tts: Vec<tokenstream::TokenTree>) -> Parser<'a> {
-    let trdr = lexer::new_tt_reader(&sess.span_diagnostic, None, tts);
-    let mut p = Parser::new(sess, Box::new(trdr), None, false);
+    let mut p = Parser::new(sess, tts, None, false);
     p.check_unknown_macro_variable();
     p
 }
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 9ba6d4d17f7..608f8688e88 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -46,7 +46,7 @@ use ext::tt::macro_parser;
 use parse;
 use parse::classify;
 use parse::common::SeqSep;
-use parse::lexer::{Reader, TokenAndSpan};
+use parse::lexer::TokenAndSpan;
 use parse::obsolete::ObsoleteSyntax;
 use parse::token::{self, MatchNt, SubstNt};
 use parse::{new_sub_parser_from_file, ParseSess, Directory, DirectoryOwnership};
@@ -188,14 +188,11 @@ pub struct Parser<'a> {
     pub restrictions: Restrictions,
     pub quote_depth: usize, // not (yet) related to the quasiquoter
     parsing_token_tree: bool,
-    pub reader: Box<Reader+'a>,
     /// The set of seen errors about obsolete syntax. Used to suppress
     /// extra detail when the same error is seen twice
     pub obsolete_set: HashSet<ObsoleteSyntax>,
     /// Used to determine the path to externally loaded source files
     pub directory: Directory,
-    /// Stack of open delimiters and their spans. Used for error message.
-    pub open_braces: Vec<(token::DelimToken, Span)>,
     /// Name of the root module this parser originated from. If `None`, then the
     /// name is not known. This does not change while the parser is descending
     /// into modules, and sub-parsers have new values for this name.
@@ -203,7 +200,6 @@ pub struct Parser<'a> {
     pub expected_tokens: Vec<TokenType>,
     pub tts: Vec<(TokenTree, usize)>,
     pub desugar_doc_comments: bool,
-    pub allow_interpolated_tts: bool,
 }
 
 #[derive(PartialEq, Eq, Clone)]
@@ -269,12 +265,17 @@ impl From<P<Expr>> for LhsExpr {
 
 impl<'a> Parser<'a> {
     pub fn new(sess: &'a ParseSess,
-               rdr: Box<Reader+'a>,
+               tokens: Vec<TokenTree>,
                directory: Option<Directory>,
                desugar_doc_comments: bool)
                -> Self {
+        let tt = TokenTree::Delimited(syntax_pos::DUMMY_SP, Rc::new(Delimited {
+            delim: token::NoDelim,
+            open_span: syntax_pos::DUMMY_SP,
+            tts: tokens,
+            close_span: syntax_pos::DUMMY_SP,
+        }));
         let mut parser = Parser {
-            reader: rdr,
             sess: sess,
             token: token::Underscore,
             span: syntax_pos::DUMMY_SP,
@@ -286,12 +287,10 @@ impl<'a> Parser<'a> {
             parsing_token_tree: false,
             obsolete_set: HashSet::new(),
             directory: Directory { path: PathBuf::new(), ownership: DirectoryOwnership::Owned },
-            open_braces: Vec::new(),
             root_module_name: None,
             expected_tokens: Vec::new(),
-            tts: Vec::new(),
+            tts: if tt.len() > 0 { vec![(tt, 0)] } else { Vec::new() },
             desugar_doc_comments: desugar_doc_comments,
-            allow_interpolated_tts: true,
         };
 
         let tok = parser.next_tok();
@@ -320,7 +319,7 @@ impl<'a> Parser<'a> {
                     continue
                 }
             } else {
-                self.reader.real_token()
+                TokenAndSpan { tok: token::Eof, sp: self.span }
             };
 
             loop {
@@ -2688,94 +2687,28 @@ impl<'a> Parser<'a> {
         // whether something will be a nonterminal or a seq
         // yet.
         match self.token {
-            token::Eof => {
-                let mut err: DiagnosticBuilder<'a> =
-                    self.diagnostic().struct_span_err(self.span,
-                                                      "this file contains an un-closed delimiter");
-                for &(_, sp) in &self.open_braces {
-                    err.span_help(sp, "did you mean to close this delimiter?");
-                }
-
-                Err(err)
-            },
             token::OpenDelim(delim) => {
-                if self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) {
+                if self.quote_depth == 0 && self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) {
                     let tt = self.tts.pop().unwrap().0;
                     self.bump();
-                    return Ok(if self.allow_interpolated_tts {
-                        // avoid needlessly reparsing token trees in recursive macro expansions
-                        TokenTree::Token(tt.span(), token::Interpolated(Rc::new(token::NtTT(tt))))
-                    } else {
-                        tt
-                    });
+                    return Ok(tt);
                 }
 
                 let parsing_token_tree = ::std::mem::replace(&mut self.parsing_token_tree, true);
-                // The span for beginning of the delimited section
-                let pre_span = self.span;
-
-                // Parse the open delimiter.
-                self.open_braces.push((delim, self.span));
                 let open_span = self.span;
                 self.bump();
-
-                // Parse the token trees within the delimiters.
-                // We stop at any delimiter so we can try to recover if the user
-                // uses an incorrect delimiter.
                 let tts = self.parse_seq_to_before_tokens(&[&token::CloseDelim(token::Brace),
                                                             &token::CloseDelim(token::Paren),
                                                             &token::CloseDelim(token::Bracket)],
                                                           SeqSep::none(),
                                                           |p| p.parse_token_tree(),
                                                           |mut e| e.emit());
+                self.parsing_token_tree = parsing_token_tree;
 
                 let close_span = self.span;
-                // Expand to cover the entire delimited token tree
-                let span = Span { hi: close_span.hi, ..pre_span };
-
-                match self.token {
-                    // Correct delimiter.
-                    token::CloseDelim(d) if d == delim => {
-                        self.open_braces.pop().unwrap();
-
-                        // Parse the close delimiter.
-                        self.bump();
-                    }
-                    // Incorrect delimiter.
-                    token::CloseDelim(other) => {
-                        let token_str = self.this_token_to_string();
-                        let mut err = self.diagnostic().struct_span_err(self.span,
-                            &format!("incorrect close delimiter: `{}`", token_str));
-                        // This is a conservative error: only report the last unclosed delimiter.
-                        // The previous unclosed delimiters could actually be closed! The parser
-                        // just hasn't gotten to them yet.
-                        if let Some(&(_, sp)) = self.open_braces.last() {
-                            err.span_note(sp, "unclosed delimiter");
-                        };
-                        err.emit();
-
-                        self.open_braces.pop().unwrap();
-
-                        // If the incorrect delimiter matches an earlier opening
-                        // delimiter, then don't consume it (it can be used to
-                        // close the earlier one). Otherwise, consume it.
-                        // E.g., we try to recover from:
-                        // fn foo() {
-                        //     bar(baz(
-                        // }  // Incorrect delimiter but matches the earlier `{`
-                        if !self.open_braces.iter().any(|&(b, _)| b == other) {
-                            self.bump();
-                        }
-                    }
-                    token::Eof => {
-                        // Silently recover, the EOF token will be seen again
-                        // and an error emitted then. Thus we don't pop from
-                        // self.open_braces here.
-                    },
-                    _ => {}
-                }
+                self.bump();
 
-                self.parsing_token_tree = parsing_token_tree;
+                let span = Span { lo: open_span.lo, ..close_span };
                 Ok(TokenTree::Delimited(span, Rc::new(Delimited {
                     delim: delim,
                     open_span: open_span,
@@ -2783,21 +2716,9 @@ impl<'a> Parser<'a> {
                     close_span: close_span,
                 })))
             },
-            token::CloseDelim(_) => {
-                // An unexpected closing delimiter (i.e., there is no
-                // matching opening delimiter).
-                let token_str = self.this_token_to_string();
-                let err = self.diagnostic().struct_span_err(self.span,
-                    &format!("unexpected close delimiter: `{}`", token_str));
-                Err(err)
-            },
-            /* we ought to allow different depths of unquotation */
-            token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => {
-                self.parse_unquoted()
-            }
-            _ => {
-                Ok(TokenTree::Token(self.span, self.bump_and_get()))
-            }
+            token::CloseDelim(_) | token::Eof => unreachable!(),
+            token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => self.parse_unquoted(),
+            _ => Ok(TokenTree::Token(self.span, self.bump_and_get())),
         }
     }
 
diff --git a/src/libsyntax/tokenstream.rs b/src/libsyntax/tokenstream.rs
index e352e7853c7..ab5dc8181e0 100644
--- a/src/libsyntax/tokenstream.rs
+++ b/src/libsyntax/tokenstream.rs
@@ -30,7 +30,6 @@ use codemap::{Spanned, combine_spans};
 use ext::base;
 use ext::tt::macro_parser;
 use parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration};
-use parse::lexer;
 use parse::{self, Directory};
 use parse::token::{self, Token, Lit, Nonterminal};
 use print::pprust;
@@ -139,7 +138,10 @@ impl TokenTree {
                 if let Nonterminal::NtTT(..) = **nt { 1 } else { 0 }
             },
             TokenTree::Token(_, token::MatchNt(..)) => 3,
-            TokenTree::Delimited(_, ref delimed) => delimed.tts.len() + 2,
+            TokenTree::Delimited(_, ref delimed) => match delimed.delim {
+                token::NoDelim => delimed.tts.len(),
+                _ => delimed.tts.len() + 2,
+            },
             TokenTree::Sequence(_, ref seq) => seq.tts.len(),
             TokenTree::Token(..) => 0,
         }
@@ -181,6 +183,9 @@ impl TokenTree {
                     close_span: sp,
                 }))
             }
+            (&TokenTree::Delimited(_, ref delimed), _) if delimed.delim == token::NoDelim => {
+                delimed.tts[index].clone()
+            }
             (&TokenTree::Delimited(_, ref delimed), _) => {
                 if index == 0 {
                     return delimed.open_tt();
@@ -215,14 +220,12 @@ impl TokenTree {
                  mtch: &[TokenTree],
                  tts: &[TokenTree])
                  -> macro_parser::NamedParseResult {
-        let diag = &cx.parse_sess().span_diagnostic;
         // `None` is because we're not interpolating
-        let arg_rdr = lexer::new_tt_reader(diag, None, tts.iter().cloned().collect());
         let directory = Directory {
             path: cx.current_expansion.module.directory.clone(),
             ownership: cx.current_expansion.directory_ownership,
         };
-        macro_parser::parse(cx.parse_sess(), arg_rdr, mtch, Some(directory))
+        macro_parser::parse(cx.parse_sess(), tts.iter().cloned().collect(), mtch, Some(directory))
     }
 
     /// Check if this TokenTree is equal to the other, regardless of span information.
author	Jeffrey Seyfried <jeffrey.seyfried@gmail.com>	2017-01-13 04:49:20 +0000
committer	Jeffrey Seyfried <jeffrey.seyfried@gmail.com>	2017-01-17 08:17:26 +0000
commit	debcbf0b8e8fcf6f1d44e8f79cc06c0866d8d1dd (patch)
tree	f61d8ca01c5e888b1f18e25dcb516d80a54b875d /src/libsyntax
parent	de46b247585999ae70674f1fa0543d62f2889c7f (diff)
download	rust-debcbf0b8e8fcf6f1d44e8f79cc06c0866d8d1dd.tar.gz rust-debcbf0b8e8fcf6f1d44e8f79cc06c0866d8d1dd.zip