diff options
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/comments.rs | 13 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 216 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/tokentrees.rs | 138 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/unicode_chars.rs | 6 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 16 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 195 |
6 files changed, 276 insertions, 308 deletions
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index ba83a55ea79..c97b8ddf919 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -13,11 +13,8 @@ pub use self::CommentStyle::*; use ast; use codemap::CodeMap; use syntax_pos::{BytePos, CharPos, Pos}; -use errors; -use parse::lexer::is_block_doc_comment; -use parse::lexer::{StringReader, TokenAndSpan}; -use parse::lexer::{is_pattern_whitespace, Reader}; -use parse::lexer; +use parse::lexer::{is_block_doc_comment, is_pattern_whitespace}; +use parse::lexer::{self, ParseSess, StringReader, TokenAndSpan}; use print::pprust; use str::char_at; @@ -346,16 +343,14 @@ pub struct Literal { // it appears this function is called only from pprust... that's // probably not a good thing. -pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler, - path: String, - srdr: &mut Read) +pub fn gather_comments_and_literals(sess: &ParseSess, path: String, srdr: &mut Read) -> (Vec<Comment>, Vec<Literal>) { let mut src = Vec::new(); srdr.read_to_end(&mut src).unwrap(); let src = String::from_utf8(src).unwrap(); let cm = CodeMap::new(); let filemap = cm.new_filemap(path, None, src); - let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap); + let mut rdr = lexer::StringReader::new_raw(sess, filemap); let mut comments: Vec<Comment> = Vec::new(); let mut literals: Vec<Literal> = Vec::new(); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 818742e4492..6bc15115b09 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -11,9 +11,8 @@ use ast::{self, Ident}; use syntax_pos::{self, BytePos, CharPos, Pos, Span}; use codemap::CodeMap; -use errors::{FatalError, Handler, DiagnosticBuilder}; -use ext::tt::transcribe::tt_next_token; -use parse::token; +use errors::{FatalError, DiagnosticBuilder}; +use parse::{token, ParseSess}; use str::char_at; use symbol::{Symbol, keywords}; use std_unicode::property::Pattern_White_Space; @@ -23,52 +22,10 @@ use std::char; use std::mem::replace; use std::rc::Rc; -pub use ext::tt::transcribe::{TtReader, new_tt_reader}; - pub mod comments; +mod tokentrees; mod unicode_chars; -pub trait Reader { - fn is_eof(&self) -> bool; - fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>; - fn next_token(&mut self) -> TokenAndSpan where Self: Sized { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - /// Report a fatal error with the current span. - fn fatal(&self, &str) -> FatalError; - /// Report a non-fatal error with the current span. - fn err(&self, &str); - fn emit_fatal_errors(&mut self); - fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan { - match res { - Ok(tok) => tok, - Err(_) => { - self.emit_fatal_errors(); - panic!(FatalError); - } - } - } - fn peek(&self) -> TokenAndSpan; - /// Get a token the parser cares about. - fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> { - let mut t = self.try_next_token()?; - loop { - match t.tok { - token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.try_next_token()?; - } - _ => break, - } - } - Ok(t) - } - fn real_token(&mut self) -> TokenAndSpan { - let res = self.try_real_token(); - self.unwrap_or_abort(res) - } -} - #[derive(Clone, PartialEq, Eq, Debug)] pub struct TokenAndSpan { pub tok: token::Token, @@ -82,7 +39,7 @@ impl Default for TokenAndSpan { } pub struct StringReader<'a> { - pub span_diagnostic: &'a Handler, + pub sess: &'a ParseSess, /// The absolute offset within the codemap of the next character to read pub next_pos: BytePos, /// The absolute offset within the codemap of the current character @@ -105,9 +62,44 @@ pub struct StringReader<'a> { // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. source_text: Rc<String>, + /// Stack of open delimiters and their spans. Used for error message. + token: token::Token, + span: Span, + open_braces: Vec<(token::DelimToken, Span)>, } -impl<'a> Reader for StringReader<'a> { +impl<'a> StringReader<'a> { + fn next_token(&mut self) -> TokenAndSpan where Self: Sized { + let res = self.try_next_token(); + self.unwrap_or_abort(res) + } + fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan { + match res { + Ok(tok) => tok, + Err(_) => { + self.emit_fatal_errors(); + panic!(FatalError); + } + } + } + fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> { + let mut t = self.try_next_token()?; + loop { + match t.tok { + token::Whitespace | token::Comment | token::Shebang(_) => { + t = self.try_next_token()?; + } + _ => break, + } + } + self.token = t.tok.clone(); + self.span = t.sp; + Ok(t) + } + pub fn real_token(&mut self) -> TokenAndSpan { + let res = self.try_real_token(); + self.unwrap_or_abort(res) + } fn is_eof(&self) -> bool { if self.ch.is_none() { return true; @@ -119,7 +111,7 @@ impl<'a> Reader for StringReader<'a> { } } /// Return the next token. EFFECT: advances the string_reader. - fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { + pub fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { assert!(self.fatal_errs.is_empty()); let ret_val = TokenAndSpan { tok: replace(&mut self.peek_tok, token::Underscore), @@ -131,16 +123,13 @@ impl<'a> Reader for StringReader<'a> { fn fatal(&self, m: &str) -> FatalError { self.fatal_span(self.peek_span, m) } - fn err(&self, m: &str) { - self.err_span(self.peek_span, m) - } - fn emit_fatal_errors(&mut self) { + pub fn emit_fatal_errors(&mut self) { for err in &mut self.fatal_errs { err.emit(); } self.fatal_errs.clear(); } - fn peek(&self) -> TokenAndSpan { + pub fn peek(&self) -> TokenAndSpan { // FIXME(pcwalton): Bad copy! TokenAndSpan { tok: self.peek_tok.clone(), @@ -149,59 +138,24 @@ impl<'a> Reader for StringReader<'a> { } } -impl<'a> Reader for TtReader<'a> { - fn is_eof(&self) -> bool { - self.peek().tok == token::Eof - } - fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { - assert!(self.fatal_errs.is_empty()); - let r = tt_next_token(self); - debug!("TtReader: r={:?}", r); - Ok(r) - } - fn fatal(&self, m: &str) -> FatalError { - self.sp_diag.span_fatal(self.cur_span, m) - } - fn err(&self, m: &str) { - self.sp_diag.span_err(self.cur_span, m); - } - fn emit_fatal_errors(&mut self) { - for err in &mut self.fatal_errs { - err.emit(); - } - self.fatal_errs.clear(); - } - fn peek(&self) -> TokenAndSpan { - TokenAndSpan { - tok: self.cur_tok.clone(), - sp: self.cur_span, - } - } -} - impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into next_pos and ch - pub fn new_raw<'b>(span_diagnostic: &'b Handler, - filemap: Rc<syntax_pos::FileMap>) - -> StringReader<'b> { - let mut sr = StringReader::new_raw_internal(span_diagnostic, filemap); + pub fn new_raw<'b>(sess: &'a ParseSess, filemap: Rc<syntax_pos::FileMap>) -> Self { + let mut sr = StringReader::new_raw_internal(sess, filemap); sr.bump(); sr } - fn new_raw_internal<'b>(span_diagnostic: &'b Handler, - filemap: Rc<syntax_pos::FileMap>) - -> StringReader<'b> { + fn new_raw_internal(sess: &'a ParseSess, filemap: Rc<syntax_pos::FileMap>) -> Self { if filemap.src.is_none() { - span_diagnostic.bug(&format!("Cannot lex filemap \ - without source: {}", - filemap.name)[..]); + sess.span_diagnostic.bug(&format!("Cannot lex filemap without source: {}", + filemap.name)); } let source_text = (*filemap.src.as_ref().unwrap()).clone(); StringReader { - span_diagnostic: span_diagnostic, + sess: sess, next_pos: filemap.start_pos, pos: filemap.start_pos, col: CharPos(0), @@ -214,13 +168,14 @@ impl<'a> StringReader<'a> { peek_span: syntax_pos::DUMMY_SP, source_text: source_text, fatal_errs: Vec::new(), + token: token::Eof, + span: syntax_pos::DUMMY_SP, + open_braces: Vec::new(), } } - pub fn new<'b>(span_diagnostic: &'b Handler, - filemap: Rc<syntax_pos::FileMap>) - -> StringReader<'b> { - let mut sr = StringReader::new_raw(span_diagnostic, filemap); + pub fn new(sess: &'a ParseSess, filemap: Rc<syntax_pos::FileMap>) -> Self { + let mut sr = StringReader::new_raw(sess, filemap); if let Err(_) = sr.advance_token() { sr.emit_fatal_errors(); panic!(FatalError); @@ -234,12 +189,12 @@ impl<'a> StringReader<'a> { /// Report a fatal lexical error with a given span. pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError { - self.span_diagnostic.span_fatal(sp, m) + self.sess.span_diagnostic.span_fatal(sp, m) } /// Report a lexical error with a given span. pub fn err_span(&self, sp: Span, m: &str) { - self.span_diagnostic.span_err(sp, m) + self.sess.span_diagnostic.span_err(sp, m) } @@ -274,7 +229,7 @@ impl<'a> StringReader<'a> { for c in c.escape_default() { m.push(c) } - self.span_diagnostic.struct_span_fatal(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) + self.sess.span_diagnostic.struct_span_fatal(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) } /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an @@ -298,7 +253,7 @@ impl<'a> StringReader<'a> { for c in c.escape_default() { m.push(c) } - self.span_diagnostic.struct_span_err(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) + self.sess.span_diagnostic.struct_span_err(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) } /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the @@ -503,9 +458,8 @@ impl<'a> StringReader<'a> { fn scan_comment(&mut self) -> Option<TokenAndSpan> { if let Some(c) = self.ch { if c.is_whitespace() { - self.span_diagnostic.span_err(syntax_pos::mk_sp(self.pos, self.pos), - "called consume_any_line_comment, but there \ - was whitespace"); + let msg = "called consume_any_line_comment, but there was whitespace"; + self.sess.span_diagnostic.span_err(syntax_pos::mk_sp(self.pos, self.pos), msg); } } @@ -875,7 +829,7 @@ impl<'a> StringReader<'a> { self.scan_unicode_escape(delim) && !ascii_only } else { let span = syntax_pos::mk_sp(start, self.pos); - self.span_diagnostic + self.sess.span_diagnostic .struct_span_err(span, "incorrect unicode escape sequence") .span_help(span, "format of unicode escape sequences is \ @@ -1701,35 +1655,41 @@ fn ident_continue(c: Option<char>) -> bool { mod tests { use super::*; - use ast::Ident; + use ast::{Ident, CrateConfig}; use symbol::Symbol; use syntax_pos::{BytePos, Span, NO_EXPANSION}; use codemap::CodeMap; use errors; + use feature_gate::UnstableFeatures; use parse::token; + use std::cell::RefCell; use std::io; use std::rc::Rc; - fn mk_sh(cm: Rc<CodeMap>) -> errors::Handler { - // FIXME (#22405): Replace `Box::new` with `box` here when/if possible. - let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), - Some(cm)); - errors::Handler::with_emitter(true, false, Box::new(emitter)) + fn mk_sess(cm: Rc<CodeMap>) -> ParseSess { + let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), Some(cm.clone())); + ParseSess { + span_diagnostic: errors::Handler::with_emitter(true, false, Box::new(emitter)), + unstable_features: UnstableFeatures::from_environment(), + config: CrateConfig::new(), + included_mod_stack: RefCell::new(Vec::new()), + code_map: cm, + } } // open a string reader for the given string fn setup<'a>(cm: &CodeMap, - span_handler: &'a errors::Handler, + sess: &'a ParseSess, teststr: String) -> StringReader<'a> { let fm = cm.new_filemap("zebra.rs".to_string(), None, teststr); - StringReader::new(span_handler, fm) + StringReader::new(sess, fm) } #[test] fn t1() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); let mut string_reader = setup(&cm, &sh, "/* my source file */ fn main() { println!(\"zebra\"); }\n" @@ -1781,7 +1741,7 @@ mod tests { #[test] fn doublecolonparsing() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a b".to_string()), vec![mk_ident("a"), token::Whitespace, mk_ident("b")]); } @@ -1789,7 +1749,7 @@ mod tests { #[test] fn dcparsing_2() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a::b".to_string()), vec![mk_ident("a"), token::ModSep, mk_ident("b")]); } @@ -1797,7 +1757,7 @@ mod tests { #[test] fn dcparsing_3() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a ::b".to_string()), vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]); } @@ -1805,7 +1765,7 @@ mod tests { #[test] fn dcparsing_4() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a:: b".to_string()), vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]); } @@ -1813,7 +1773,7 @@ mod tests { #[test] fn character_a() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok, token::Literal(token::Char(Symbol::intern("a")), None)); } @@ -1821,7 +1781,7 @@ mod tests { #[test] fn character_space() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok, token::Literal(token::Char(Symbol::intern(" ")), None)); } @@ -1829,7 +1789,7 @@ mod tests { #[test] fn character_escaped() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok, token::Literal(token::Char(Symbol::intern("\\n")), None)); } @@ -1837,7 +1797,7 @@ mod tests { #[test] fn lifetime_name() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok, token::Lifetime(Ident::from_str("'abc"))); } @@ -1845,7 +1805,7 @@ mod tests { #[test] fn raw_string() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()) .next_token() .tok, @@ -1855,7 +1815,7 @@ mod tests { #[test] fn literal_suffixes() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); macro_rules! test { ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok, @@ -1899,7 +1859,7 @@ mod tests { #[test] fn nested_block_comments() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string()); match lexer.next_token().tok { token::Comment => {} @@ -1912,7 +1872,7 @@ mod tests { #[test] fn crlf_comments() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string()); let comment = lexer.next_token(); assert_eq!(comment.tok, token::Comment); diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs new file mode 100644 index 00000000000..7b6f00e0e82 --- /dev/null +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -0,0 +1,138 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use print::pprust::token_to_string; +use parse::lexer::StringReader; +use parse::{token, PResult}; +use syntax_pos::Span; +use tokenstream::{Delimited, TokenTree}; + +use std::rc::Rc; + +impl<'a> StringReader<'a> { + // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. + pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec<TokenTree>> { + let mut tts = Vec::new(); + while self.token != token::Eof { + tts.push(self.parse_token_tree()?); + } + Ok(tts) + } + + // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. + fn parse_token_trees_until_close_delim(&mut self) -> Vec<TokenTree> { + let mut tts = vec![]; + loop { + if let token::CloseDelim(..) = self.token { + return tts; + } + match self.parse_token_tree() { + Ok(tt) => tts.push(tt), + Err(mut e) => { + e.emit(); + return tts; + } + } + } + } + + fn parse_token_tree(&mut self) -> PResult<'a, TokenTree> { + match self.token { + token::Eof => { + let msg = "this file contains an un-closed delimiter"; + let mut err = self.sess.span_diagnostic.struct_span_err(self.span, msg); + for &(_, sp) in &self.open_braces { + err.span_help(sp, "did you mean to close this delimiter?"); + } + Err(err) + }, + token::OpenDelim(delim) => { + // The span for beginning of the delimited section + let pre_span = self.span; + + // Parse the open delimiter. + self.open_braces.push((delim, self.span)); + let open_span = self.span; + self.real_token(); + + // Parse the token trees within the delimiters. + // We stop at any delimiter so we can try to recover if the user + // uses an incorrect delimiter. + let tts = self.parse_token_trees_until_close_delim(); + + let close_span = self.span; + // Expand to cover the entire delimited token tree + let span = Span { hi: close_span.hi, ..pre_span }; + + match self.token { + // Correct delimiter. + token::CloseDelim(d) if d == delim => { + self.open_braces.pop().unwrap(); + + // Parse the close delimiter. + self.real_token(); + } + // Incorrect delimiter. + token::CloseDelim(other) => { + let token_str = token_to_string(&self.token); + let msg = format!("incorrect close delimiter: `{}`", token_str); + let mut err = self.sess.span_diagnostic.struct_span_err(self.span, &msg); + // This is a conservative error: only report the last unclosed delimiter. + // The previous unclosed delimiters could actually be closed! The parser + // just hasn't gotten to them yet. + if let Some(&(_, sp)) = self.open_braces.last() { + err.span_note(sp, "unclosed delimiter"); + }; + err.emit(); + + self.open_braces.pop().unwrap(); + + // If the incorrect delimiter matches an earlier opening + // delimiter, then don't consume it (it can be used to + // close the earlier one). Otherwise, consume it. + // E.g., we try to recover from: + // fn foo() { + // bar(baz( + // } // Incorrect delimiter but matches the earlier `{` + if !self.open_braces.iter().any(|&(b, _)| b == other) { + self.real_token(); + } + } + token::Eof => { + // Silently recover, the EOF token will be seen again + // and an error emitted then. Thus we don't pop from + // self.open_braces here. + }, + _ => {} + } + + Ok(TokenTree::Delimited(span, Rc::new(Delimited { + delim: delim, + open_span: open_span, + tts: tts, + close_span: close_span, + }))) + }, + token::CloseDelim(_) => { + // An unexpected closing delimiter (i.e., there is no + // matching opening delimiter). + let token_str = token_to_string(&self.token); + let msg = format!("unexpected close delimiter: `{}`", token_str); + let err = self.sess.span_diagnostic.struct_span_err(self.span, &msg); + Err(err) + }, + _ => { + let tt = TokenTree::Token(self.span, self.token.clone()); + self.real_token(); + Ok(tt) + } + } + } +} diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 1e08b20b7e1..6da3e5de75c 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -243,10 +243,8 @@ pub fn check_for_substitution<'a>(reader: &StringReader<'a>, err.span_help(span, &msg); }, None => { - reader - .span_diagnostic - .span_bug_no_panic(span, - &format!("substitution character not found for '{}'", ch)); + let msg = format!("substitution character not found for '{}'", ch); + reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); } } }); diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 1a1950dc45c..08f5df4515b 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -45,7 +45,7 @@ pub mod obsolete; /// Info about a parsing session. pub struct ParseSess { - pub span_diagnostic: Handler, // better be the same as the one in the reader! + pub span_diagnostic: Handler, pub unstable_features: UnstableFeatures, pub config: CrateConfig, /// Used to determine and report recursive mod inclusions @@ -219,19 +219,15 @@ fn file_to_filemap(sess: &ParseSess, path: &Path, spanopt: Option<Span>) } /// Given a filemap, produce a sequence of token-trees -pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>) - -> Vec<tokenstream::TokenTree> { - // it appears to me that the cfg doesn't matter here... indeed, - // parsing tt's probably shouldn't require a parser at all. - let srdr = lexer::StringReader::new(&sess.span_diagnostic, filemap); - let mut p1 = Parser::new(sess, Box::new(srdr), None, false); - panictry!(p1.parse_all_token_trees()) +pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>) -> Vec<tokenstream::TokenTree> { + let mut srdr = lexer::StringReader::new(sess, filemap); + srdr.real_token(); + panictry!(srdr.parse_all_token_trees()) } /// Given tts and the ParseSess, produce a parser pub fn tts_to_parser<'a>(sess: &'a ParseSess, tts: Vec<tokenstream::TokenTree>) -> Parser<'a> { - let trdr = lexer::new_tt_reader(&sess.span_diagnostic, None, tts); - let mut p = Parser::new(sess, Box::new(trdr), None, false); + let mut p = Parser::new(sess, tts, None, false); p.check_unknown_macro_variable(); p } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index d1a683b0bd5..fd6abc58b63 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -46,7 +46,7 @@ use ext::tt::macro_parser; use parse; use parse::classify; use parse::common::SeqSep; -use parse::lexer::{Reader, TokenAndSpan}; +use parse::lexer::TokenAndSpan; use parse::obsolete::ObsoleteSyntax; use parse::token::{self, MatchNt, SubstNt}; use parse::{new_sub_parser_from_file, ParseSess, Directory, DirectoryOwnership}; @@ -156,22 +156,6 @@ enum PrevTokenKind { Other, } -// Simple circular buffer used for keeping few next tokens. -#[derive(Default)] -struct LookaheadBuffer { - buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY], - start: usize, - end: usize, -} - -const LOOKAHEAD_BUFFER_CAPACITY: usize = 8; - -impl LookaheadBuffer { - fn len(&self) -> usize { - (LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY - } -} - /* ident is handled by common.rs */ pub struct Parser<'a> { @@ -184,19 +168,14 @@ pub struct Parser<'a> { pub prev_span: Span, /// the previous token kind prev_token_kind: PrevTokenKind, - lookahead_buffer: LookaheadBuffer, - pub tokens_consumed: usize, pub restrictions: Restrictions, pub quote_depth: usize, // not (yet) related to the quasiquoter parsing_token_tree: bool, - pub reader: Box<Reader+'a>, /// The set of seen errors about obsolete syntax. Used to suppress /// extra detail when the same error is seen twice pub obsolete_set: HashSet<ObsoleteSyntax>, /// Used to determine the path to externally loaded source files pub directory: Directory, - /// Stack of open delimiters and their spans. Used for error message. - pub open_braces: Vec<(token::DelimToken, Span)>, /// Name of the root module this parser originated from. If `None`, then the /// name is not known. This does not change while the parser is descending /// into modules, and sub-parsers have new values for this name. @@ -204,7 +183,6 @@ pub struct Parser<'a> { pub expected_tokens: Vec<TokenType>, pub tts: Vec<(TokenTree, usize)>, pub desugar_doc_comments: bool, - pub allow_interpolated_tts: bool, } #[derive(PartialEq, Eq, Clone)] @@ -270,30 +248,31 @@ impl From<P<Expr>> for LhsExpr { impl<'a> Parser<'a> { pub fn new(sess: &'a ParseSess, - rdr: Box<Reader+'a>, + tokens: Vec<TokenTree>, directory: Option<Directory>, desugar_doc_comments: bool) -> Self { + let tt = TokenTree::Delimited(syntax_pos::DUMMY_SP, Rc::new(Delimited { + delim: token::NoDelim, + open_span: syntax_pos::DUMMY_SP, + tts: tokens, + close_span: syntax_pos::DUMMY_SP, + })); let mut parser = Parser { - reader: rdr, sess: sess, token: token::Underscore, span: syntax_pos::DUMMY_SP, prev_span: syntax_pos::DUMMY_SP, prev_token_kind: PrevTokenKind::Other, - lookahead_buffer: Default::default(), - tokens_consumed: 0, restrictions: Restrictions::empty(), quote_depth: 0, parsing_token_tree: false, obsolete_set: HashSet::new(), directory: Directory { path: PathBuf::new(), ownership: DirectoryOwnership::Owned }, - open_braces: Vec::new(), root_module_name: None, expected_tokens: Vec::new(), - tts: Vec::new(), + tts: if tt.len() > 0 { vec![(tt, 0)] } else { Vec::new() }, desugar_doc_comments: desugar_doc_comments, - allow_interpolated_tts: true, }; let tok = parser.next_tok(); @@ -309,8 +288,8 @@ impl<'a> Parser<'a> { } fn next_tok(&mut self) -> TokenAndSpan { - 'outer: loop { - let mut tok = if let Some((tts, i)) = self.tts.pop() { + loop { + let tok = if let Some((tts, i)) = self.tts.pop() { let tt = tts.get_tt(i); if i + 1 < tts.len() { self.tts.push((tts, i + 1)); @@ -322,28 +301,14 @@ impl<'a> Parser<'a> { continue } } else { - self.reader.real_token() + TokenAndSpan { tok: token::Eof, sp: self.span } }; - loop { - let nt = match tok.tok { - token::Interpolated(ref nt) => nt.clone(), - token::DocComment(name) if self.desugar_doc_comments => { - self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0)); - continue 'outer - } - _ => return tok, - }; - match *nt { - token::NtTT(TokenTree::Token(sp, ref t)) => { - tok = TokenAndSpan { tok: t.clone(), sp: sp }; - } - token::NtTT(ref tt) => { - self.tts.push((tt.clone(), 0)); - continue 'outer - } - _ => return tok, + match tok.tok { + token::DocComment(name) if self.desugar_doc_comments => { + self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0)); } + _ => return tok, } } } @@ -892,17 +857,9 @@ impl<'a> Parser<'a> { _ => PrevTokenKind::Other, }; - let next = if self.lookahead_buffer.start == self.lookahead_buffer.end { - self.next_tok() - } else { - // Avoid token copies with `replace`. - let old_start = self.lookahead_buffer.start; - self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY; - mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default()) - }; + let next = self.next_tok(); self.span = next.sp; self.token = next.tok; - self.tokens_consumed += 1; self.expected_tokens.clear(); // check after each token self.check_unknown_macro_variable(); @@ -935,18 +892,20 @@ impl<'a> Parser<'a> { F: FnOnce(&token::Token) -> R, { if dist == 0 { - f(&self.token) - } else if dist < LOOKAHEAD_BUFFER_CAPACITY { - while self.lookahead_buffer.len() < dist { - self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.next_tok(); - self.lookahead_buffer.end = - (self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY; - } - let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY; - f(&self.lookahead_buffer.buffer[index].tok) - } else { - self.bug("lookahead distance is too large"); + return f(&self.token); + } + let mut tok = token::Eof; + if let Some(&(ref tts, mut i)) = self.tts.last() { + i += dist - 1; + if i < tts.len() { + tok = match tts.get_tt(i) { + TokenTree::Token(_, tok) => tok, + TokenTree::Delimited(_, delimited) => token::OpenDelim(delimited.delim), + TokenTree::Sequence(..) => token::Dollar, + }; + } } + f(&tok) } pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> { self.sess.span_diagnostic.struct_span_fatal(self.span, m) @@ -2743,94 +2702,28 @@ impl<'a> Parser<'a> { // whether something will be a nonterminal or a seq // yet. match self.token { - token::Eof => { - let mut err: DiagnosticBuilder<'a> = - self.diagnostic().struct_span_err(self.span, - "this file contains an un-closed delimiter"); - for &(_, sp) in &self.open_braces { - err.span_help(sp, "did you mean to close this delimiter?"); - } - - Err(err) - }, token::OpenDelim(delim) => { - if self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) { + if self.quote_depth == 0 && self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) { let tt = self.tts.pop().unwrap().0; self.bump(); - return Ok(if self.allow_interpolated_tts { - // avoid needlessly reparsing token trees in recursive macro expansions - TokenTree::Token(tt.span(), token::Interpolated(Rc::new(token::NtTT(tt)))) - } else { - tt - }); + return Ok(tt); } let parsing_token_tree = ::std::mem::replace(&mut self.parsing_token_tree, true); - // The span for beginning of the delimited section - let pre_span = self.span; - - // Parse the open delimiter. - self.open_braces.push((delim, self.span)); let open_span = self.span; self.bump(); - - // Parse the token trees within the delimiters. - // We stop at any delimiter so we can try to recover if the user - // uses an incorrect delimiter. let tts = self.parse_seq_to_before_tokens(&[&token::CloseDelim(token::Brace), &token::CloseDelim(token::Paren), &token::CloseDelim(token::Bracket)], SeqSep::none(), |p| p.parse_token_tree(), |mut e| e.emit()); + self.parsing_token_tree = parsing_token_tree; let close_span = self.span; - // Expand to cover the entire delimited token tree - let span = Span { hi: close_span.hi, ..pre_span }; - - match self.token { - // Correct delimiter. - token::CloseDelim(d) if d == delim => { - self.open_braces.pop().unwrap(); - - // Parse the close delimiter. - self.bump(); - } - // Incorrect delimiter. - token::CloseDelim(other) => { - let token_str = self.this_token_to_string(); - let mut err = self.diagnostic().struct_span_err(self.span, - &format!("incorrect close delimiter: `{}`", token_str)); - // This is a conservative error: only report the last unclosed delimiter. - // The previous unclosed delimiters could actually be closed! The parser - // just hasn't gotten to them yet. - if let Some(&(_, sp)) = self.open_braces.last() { - err.span_note(sp, "unclosed delimiter"); - }; - err.emit(); - - self.open_braces.pop().unwrap(); - - // If the incorrect delimiter matches an earlier opening - // delimiter, then don't consume it (it can be used to - // close the earlier one). Otherwise, consume it. - // E.g., we try to recover from: - // fn foo() { - // bar(baz( - // } // Incorrect delimiter but matches the earlier `{` - if !self.open_braces.iter().any(|&(b, _)| b == other) { - self.bump(); - } - } - token::Eof => { - // Silently recover, the EOF token will be seen again - // and an error emitted then. Thus we don't pop from - // self.open_braces here. - }, - _ => {} - } + self.bump(); - self.parsing_token_tree = parsing_token_tree; + let span = Span { lo: open_span.lo, ..close_span }; Ok(TokenTree::Delimited(span, Rc::new(Delimited { delim: delim, open_span: open_span, @@ -2838,21 +2731,9 @@ impl<'a> Parser<'a> { close_span: close_span, }))) }, - token::CloseDelim(_) => { - // An unexpected closing delimiter (i.e., there is no - // matching opening delimiter). - let token_str = self.this_token_to_string(); - let err = self.diagnostic().struct_span_err(self.span, - &format!("unexpected close delimiter: `{}`", token_str)); - Err(err) - }, - /* we ought to allow different depths of unquotation */ - token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => { - self.parse_unquoted() - } - _ => { - Ok(TokenTree::Token(self.span, self.bump_and_get())) - } + token::CloseDelim(_) | token::Eof => unreachable!(), + token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => self.parse_unquoted(), + _ => Ok(TokenTree::Token(self.span, self.bump_and_get())), } } |
