diff options
| author | bors <bors@rust-lang.org> | 2016-04-27 13:49:45 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2016-04-27 13:49:45 -0700 |
| commit | cda7c1cf2463443aee4a2f51a5141bc7ce4a4f97 (patch) | |
| tree | 52be2ea17940cd98bbdfdc205d2552ce71eb127d /src/libsyntax/parse | |
| parent | b52d76a08528273b218f168753ed846ecfb59aec (diff) | |
| parent | 6887202ea3a1d3e3df0c88c07c754defd87b9712 (diff) | |
| download | rust-cda7c1cf2463443aee4a2f51a5141bc7ce4a4f97.tar.gz rust-cda7c1cf2463443aee4a2f51a5141bc7ce4a4f97.zip | |
Auto merge of #33199 - mitaa:tokenize-responsibly, r=nrc
Make some fatal lexer errors recoverable I've kept the changes to a minimum since I'm not really sure if this approach is a acceptable. fixes #12834 cc @nrc
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 169 |
1 files changed, 103 insertions, 66 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 2eda13adcb5..881663a056c 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -29,24 +29,42 @@ mod unicode_chars; pub trait Reader { fn is_eof(&self) -> bool; - fn next_token(&mut self) -> TokenAndSpan; + fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>; + fn next_token(&mut self) -> TokenAndSpan where Self: Sized { + let res = self.try_next_token(); + self.unwrap_or_abort(res) + } /// Report a fatal error with the current span. fn fatal(&self, &str) -> FatalError; /// Report a non-fatal error with the current span. fn err(&self, &str); + fn emit_fatal_errors(&mut self); + fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan { + match res { + Ok(tok) => tok, + Err(_) => { + self.emit_fatal_errors(); + panic!(FatalError); + } + } + } fn peek(&self) -> TokenAndSpan; /// Get a token the parser cares about. - fn real_token(&mut self) -> TokenAndSpan { - let mut t = self.next_token(); + fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> { + let mut t = self.try_next_token()?; loop { match t.tok { token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.next_token(); + t = self.try_next_token()?; } _ => break, } } - t + Ok(t) + } + fn real_token(&mut self) -> TokenAndSpan { + let res = self.try_real_token(); + self.unwrap_or_abort(res) } } @@ -70,7 +88,7 @@ pub struct StringReader<'a> { // cached: pub peek_tok: token::Token, pub peek_span: Span, - + pub fatal_errs: Vec<DiagnosticBuilder<'a>>, // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. source_text: Rc<String>, @@ -81,13 +99,14 @@ impl<'a> Reader for StringReader<'a> { self.curr.is_none() } /// Return the next token. EFFECT: advances the string_reader. - fn next_token(&mut self) -> TokenAndSpan { + fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { + assert!(self.fatal_errs.is_empty()); let ret_val = TokenAndSpan { tok: replace(&mut self.peek_tok, token::Underscore), sp: self.peek_span, }; - self.advance_token(); - ret_val + self.advance_token()?; + Ok(ret_val) } fn fatal(&self, m: &str) -> FatalError { self.fatal_span(self.peek_span, m) @@ -95,6 +114,12 @@ impl<'a> Reader for StringReader<'a> { fn err(&self, m: &str) { self.err_span(self.peek_span, m) } + fn emit_fatal_errors(&mut self) { + for err in &mut self.fatal_errs { + err.emit(); + } + self.fatal_errs.clear(); + } fn peek(&self) -> TokenAndSpan { // FIXME(pcwalton): Bad copy! TokenAndSpan { @@ -108,10 +133,11 @@ impl<'a> Reader for TtReader<'a> { fn is_eof(&self) -> bool { self.cur_tok == token::Eof } - fn next_token(&mut self) -> TokenAndSpan { + fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { + assert!(self.fatal_errs.is_empty()); let r = tt_next_token(self); debug!("TtReader: r={:?}", r); - r + Ok(r) } fn fatal(&self, m: &str) -> FatalError { self.sp_diag.span_fatal(self.cur_span, m) @@ -119,6 +145,12 @@ impl<'a> Reader for TtReader<'a> { fn err(&self, m: &str) { self.sp_diag.span_err(self.cur_span, m); } + fn emit_fatal_errors(&mut self) { + for err in &mut self.fatal_errs { + err.emit(); + } + self.fatal_errs.clear(); + } fn peek(&self) -> TokenAndSpan { TokenAndSpan { tok: self.cur_tok.clone(), @@ -151,6 +183,7 @@ impl<'a> StringReader<'a> { peek_tok: token::Eof, peek_span: codemap::DUMMY_SP, source_text: source_text, + fatal_errs: Vec::new(), }; sr.bump(); sr @@ -160,7 +193,10 @@ impl<'a> StringReader<'a> { filemap: Rc<codemap::FileMap>) -> StringReader<'b> { let mut sr = StringReader::new_raw(span_diagnostic, filemap); - sr.advance_token(); + if let Err(_) = sr.advance_token() { + sr.emit_fatal_errors(); + panic!(FatalError); + } sr } @@ -249,7 +285,7 @@ impl<'a> StringReader<'a> { /// Advance peek_tok and peek_span to refer to the next token, and /// possibly update the interner. - fn advance_token(&mut self) { + fn advance_token(&mut self) -> Result<(), ()> { match self.scan_whitespace_or_comment() { Some(comment) => { self.peek_span = comment.sp; @@ -261,11 +297,12 @@ impl<'a> StringReader<'a> { self.peek_span = codemap::mk_sp(self.filemap.end_pos, self.filemap.end_pos); } else { let start_bytepos = self.last_pos; - self.peek_tok = self.next_token_inner(); + self.peek_tok = self.next_token_inner()?; self.peek_span = codemap::mk_sp(start_bytepos, self.last_pos); }; } } + Ok(()) } fn byte_offset(&self, pos: BytePos) -> BytePos { @@ -1013,7 +1050,7 @@ impl<'a> StringReader<'a> { /// Return the next token from the string, advances the input past that /// token, and updates the interner - fn next_token_inner(&mut self) -> token::Token { + fn next_token_inner(&mut self) -> Result<token::Token, ()> { let c = self.curr; if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) { @@ -1033,32 +1070,32 @@ impl<'a> StringReader<'a> { self.bump(); } - return self.with_str_from(start, |string| { + return Ok(self.with_str_from(start, |string| { if string == "_" { token::Underscore } else { // FIXME: perform NFKC normalization here. (Issue #2253) token::Ident(str_to_ident(string)) } - }); + })); } if is_dec_digit(c) { let num = self.scan_number(c.unwrap()); let suffix = self.scan_optional_raw_name(); debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); - return token::Literal(num, suffix); + return Ok(token::Literal(num, suffix)); } match c.expect("next_token_inner called at EOF") { // One-byte tokens. ';' => { self.bump(); - return token::Semi; + return Ok(token::Semi); } ',' => { self.bump(); - return token::Comma; + return Ok(token::Comma); } '.' => { self.bump(); @@ -1066,67 +1103,67 @@ impl<'a> StringReader<'a> { self.bump(); if self.curr_is('.') { self.bump(); - token::DotDotDot + Ok(token::DotDotDot) } else { - token::DotDot + Ok(token::DotDot) } } else { - token::Dot + Ok(token::Dot) }; } '(' => { self.bump(); - return token::OpenDelim(token::Paren); + return Ok(token::OpenDelim(token::Paren)); } ')' => { self.bump(); - return token::CloseDelim(token::Paren); + return Ok(token::CloseDelim(token::Paren)); } '{' => { self.bump(); - return token::OpenDelim(token::Brace); + return Ok(token::OpenDelim(token::Brace)); } '}' => { self.bump(); - return token::CloseDelim(token::Brace); + return Ok(token::CloseDelim(token::Brace)); } '[' => { self.bump(); - return token::OpenDelim(token::Bracket); + return Ok(token::OpenDelim(token::Bracket)); } ']' => { self.bump(); - return token::CloseDelim(token::Bracket); + return Ok(token::CloseDelim(token::Bracket)); } '@' => { self.bump(); - return token::At; + return Ok(token::At); } '#' => { self.bump(); - return token::Pound; + return Ok(token::Pound); } '~' => { self.bump(); - return token::Tilde; + return Ok(token::Tilde); } '?' => { self.bump(); - return token::Question; + return Ok(token::Question); } ':' => { self.bump(); if self.curr_is(':') { self.bump(); - return token::ModSep; + return Ok(token::ModSep); } else { - return token::Colon; + return Ok(token::Colon); } } '$' => { self.bump(); - return token::Dollar; + return Ok(token::Dollar); } // Multi-byte tokens. @@ -1134,21 +1171,21 @@ impl<'a> StringReader<'a> { self.bump(); if self.curr_is('=') { self.bump(); - return token::EqEq; + return Ok(token::EqEq); } else if self.curr_is('>') { self.bump(); - return token::FatArrow; + return Ok(token::FatArrow); } else { - return token::Eq; + return Ok(token::Eq); } } '!' => { self.bump(); if self.curr_is('=') { self.bump(); - return token::Ne; + return Ok(token::Ne); } else { - return token::Not; + return Ok(token::Not); } } '<' => { @@ -1156,21 +1193,21 @@ impl<'a> StringReader<'a> { match self.curr.unwrap_or('\x00') { '=' => { self.bump(); - return token::Le; + return Ok(token::Le); } '<' => { - return self.binop(token::Shl); + return Ok(self.binop(token::Shl)); } '-' => { self.bump(); match self.curr.unwrap_or('\x00') { _ => { - return token::LArrow; + return Ok(token::LArrow); } } } _ => { - return token::Lt; + return Ok(token::Lt); } } } @@ -1179,13 +1216,13 @@ impl<'a> StringReader<'a> { match self.curr.unwrap_or('\x00') { '=' => { self.bump(); - return token::Ge; + return Ok(token::Ge); } '>' => { - return self.binop(token::Shr); + return Ok(self.binop(token::Shr)); } _ => { - return token::Gt; + return Ok(token::Gt); } } } @@ -1233,7 +1270,7 @@ impl<'a> StringReader<'a> { self.err_span_(start, last_bpos, "lifetimes cannot use keyword names"); } - return token::Lifetime(ident); + return Ok(token::Lifetime(ident)); } let valid = self.scan_char_or_byte(start, @@ -1255,7 +1292,7 @@ impl<'a> StringReader<'a> { }; self.bump(); // advance curr past token let suffix = self.scan_optional_raw_name(); - return token::Literal(token::Char(id), suffix); + return Ok(token::Literal(token::Char(id), suffix)); } 'b' => { self.bump(); @@ -1266,7 +1303,7 @@ impl<'a> StringReader<'a> { _ => unreachable!(), // Should have been a token::Ident above. }; let suffix = self.scan_optional_raw_name(); - return token::Literal(lit, suffix); + return Ok(token::Literal(lit, suffix)); } '"' => { let start_bpos = self.last_pos; @@ -1297,7 +1334,7 @@ impl<'a> StringReader<'a> { }; self.bump(); let suffix = self.scan_optional_raw_name(); - return token::Literal(token::Str_(id), suffix); + return Ok(token::Literal(token::Str_(id), suffix)); } 'r' => { let start_bpos = self.last_pos; @@ -1368,24 +1405,24 @@ impl<'a> StringReader<'a> { token::intern("??") }; let suffix = self.scan_optional_raw_name(); - return token::Literal(token::StrRaw(id, hash_count), suffix); + return Ok(token::Literal(token::StrRaw(id, hash_count), suffix)); } '-' => { if self.nextch_is('>') { self.bump(); self.bump(); - return token::RArrow; + return Ok(token::RArrow); } else { - return self.binop(token::Minus); + return Ok(self.binop(token::Minus)); } } '&' => { if self.nextch_is('&') { self.bump(); self.bump(); - return token::AndAnd; + return Ok(token::AndAnd); } else { - return self.binop(token::And); + return Ok(self.binop(token::And)); } } '|' => { @@ -1393,27 +1430,27 @@ impl<'a> StringReader<'a> { Some('|') => { self.bump(); self.bump(); - return token::OrOr; + return Ok(token::OrOr); } _ => { - return self.binop(token::Or); + return Ok(self.binop(token::Or)); } } } '+' => { - return self.binop(token::Plus); + return Ok(self.binop(token::Plus)); } '*' => { - return self.binop(token::Star); + return Ok(self.binop(token::Star)); } '/' => { - return self.binop(token::Slash); + return Ok(self.binop(token::Slash)); } '^' => { - return self.binop(token::Caret); + return Ok(self.binop(token::Caret)); } '%' => { - return self.binop(token::Percent); + return Ok(self.binop(token::Percent)); } c => { let last_bpos = self.last_pos; @@ -1423,8 +1460,8 @@ impl<'a> StringReader<'a> { "unknown start of token", c); unicode_chars::check_for_substitution(&self, c, &mut err); - err.emit(); - panic!(FatalError); + self.fatal_errs.push(err); + Err(()) } } } |
