diff options
| author | bors <bors@rust-lang.org> | 2016-04-27 13:49:45 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2016-04-27 13:49:45 -0700 |
| commit | cda7c1cf2463443aee4a2f51a5141bc7ce4a4f97 (patch) | |
| tree | 52be2ea17940cd98bbdfdc205d2552ce71eb127d | |
| parent | b52d76a08528273b218f168753ed846ecfb59aec (diff) | |
| parent | 6887202ea3a1d3e3df0c88c07c754defd87b9712 (diff) | |
| download | rust-cda7c1cf2463443aee4a2f51a5141bc7ce4a4f97.tar.gz rust-cda7c1cf2463443aee4a2f51a5141bc7ce4a4f97.zip | |
Auto merge of #33199 - mitaa:tokenize-responsibly, r=nrc
Make some fatal lexer errors recoverable I've kept the changes to a minimum since I'm not really sure if this approach is a acceptable. fixes #12834 cc @nrc
| -rw-r--r-- | src/librustdoc/html/highlight.rs | 26 | ||||
| -rw-r--r-- | src/libsyntax/errors/mod.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/ext/tt/transcribe.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 169 | ||||
| -rw-r--r-- | src/test/rustdoc/issue-12834.rs | 21 |
5 files changed, 148 insertions, 74 deletions
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 2f0ae540fbf..789239c3ab0 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -29,9 +29,11 @@ pub fn render_with_highlighting(src: &str, class: Option<&str>, id: Option<&str> let mut out = Vec::new(); write_header(class, id, &mut out).unwrap(); - write_source(&sess, - lexer::StringReader::new(&sess.span_diagnostic, fm), - &mut out).unwrap(); + if let Err(_) = write_source(&sess, + lexer::StringReader::new(&sess.span_diagnostic, fm), + &mut out) { + return format!("<pre>{}</pre>", src) + } write_footer(&mut out).unwrap(); String::from_utf8_lossy(&out[..]).into_owned() } @@ -39,15 +41,15 @@ pub fn render_with_highlighting(src: &str, class: Option<&str>, id: Option<&str> /// Highlights `src`, returning the HTML output. Returns only the inner html to /// be inserted into an element. C.f., `render_with_highlighting` which includes /// an enclosing `<pre>` block. -pub fn render_inner_with_highlighting(src: &str) -> String { +pub fn render_inner_with_highlighting(src: &str) -> io::Result<String> { let sess = parse::ParseSess::new(); let fm = sess.codemap().new_filemap("<stdin>".to_string(), src.to_string()); let mut out = Vec::new(); write_source(&sess, lexer::StringReader::new(&sess.span_diagnostic, fm), - &mut out).unwrap(); - String::from_utf8_lossy(&out[..]).into_owned() + &mut out)?; + Ok(String::from_utf8_lossy(&out[..]).into_owned()) } /// Exhausts the `lexer` writing the output into `out`. @@ -65,7 +67,17 @@ fn write_source(sess: &parse::ParseSess, let mut is_macro = false; let mut is_macro_nonterminal = false; loop { - let next = lexer.next_token(); + let next = match lexer.try_next_token() { + Ok(tok) => tok, + Err(_) => { + lexer.emit_fatal_errors(); + lexer.span_diagnostic.struct_warn("Backing out of syntax highlighting") + .note("You probably did not intend to render this \ + as a rust code-block") + .emit(); + return Err(io::Error::new(io::ErrorKind::Other, "")) + }, + }; let snip = |sp| sess.codemap().span_to_snippet(sp).unwrap(); diff --git a/src/libsyntax/errors/mod.rs b/src/libsyntax/errors/mod.rs index acf65fe15b1..792828b3054 100644 --- a/src/libsyntax/errors/mod.rs +++ b/src/libsyntax/errors/mod.rs @@ -177,6 +177,7 @@ impl error::Error for ExplicitBug { /// Used for emitting structured error messages and other diagnostic information. #[must_use] +#[derive(Clone)] pub struct DiagnosticBuilder<'a> { emitter: &'a RefCell<Box<Emitter>>, level: Level, @@ -187,6 +188,7 @@ pub struct DiagnosticBuilder<'a> { } /// For example a note attached to an error. +#[derive(Clone)] struct SubDiagnostic { level: Level, message: String, diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index 7f53d0f412c..6b3b5ce9de9 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -12,7 +12,7 @@ use self::LockstepIterSize::*; use ast; use ast::{TokenTree, Ident, Name}; use codemap::{Span, DUMMY_SP}; -use errors::Handler; +use errors::{Handler, DiagnosticBuilder}; use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal}; use parse::token::{DocComment, MatchNt, SubstNt}; use parse::token::{Token, NtIdent, SpecialMacroVar}; @@ -50,6 +50,7 @@ pub struct TtReader<'a> { pub cur_span: Span, /// Transform doc comments. Only useful in macro invocations pub desugar_doc_comments: bool, + pub fatal_errs: Vec<DiagnosticBuilder<'a>>, } /// This can do Macro-By-Example transcription. On the other hand, if @@ -99,6 +100,7 @@ pub fn new_tt_reader_with_doc_flag(sp_diag: &Handler, /* dummy values, never read: */ cur_tok: token::Eof, cur_span: DUMMY_SP, + fatal_errs: Vec::new(), }; tt_next_token(&mut r); /* get cur_tok and cur_span set up */ r diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 2eda13adcb5..881663a056c 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -29,24 +29,42 @@ mod unicode_chars; pub trait Reader { fn is_eof(&self) -> bool; - fn next_token(&mut self) -> TokenAndSpan; + fn try_next_token(&mut self) -> Result<TokenAndSpan, ()>; + fn next_token(&mut self) -> TokenAndSpan where Self: Sized { + let res = self.try_next_token(); + self.unwrap_or_abort(res) + } /// Report a fatal error with the current span. fn fatal(&self, &str) -> FatalError; /// Report a non-fatal error with the current span. fn err(&self, &str); + fn emit_fatal_errors(&mut self); + fn unwrap_or_abort(&mut self, res: Result<TokenAndSpan, ()>) -> TokenAndSpan { + match res { + Ok(tok) => tok, + Err(_) => { + self.emit_fatal_errors(); + panic!(FatalError); + } + } + } fn peek(&self) -> TokenAndSpan; /// Get a token the parser cares about. - fn real_token(&mut self) -> TokenAndSpan { - let mut t = self.next_token(); + fn try_real_token(&mut self) -> Result<TokenAndSpan, ()> { + let mut t = self.try_next_token()?; loop { match t.tok { token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.next_token(); + t = self.try_next_token()?; } _ => break, } } - t + Ok(t) + } + fn real_token(&mut self) -> TokenAndSpan { + let res = self.try_real_token(); + self.unwrap_or_abort(res) } } @@ -70,7 +88,7 @@ pub struct StringReader<'a> { // cached: pub peek_tok: token::Token, pub peek_span: Span, - + pub fatal_errs: Vec<DiagnosticBuilder<'a>>, // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. source_text: Rc<String>, @@ -81,13 +99,14 @@ impl<'a> Reader for StringReader<'a> { self.curr.is_none() } /// Return the next token. EFFECT: advances the string_reader. - fn next_token(&mut self) -> TokenAndSpan { + fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { + assert!(self.fatal_errs.is_empty()); let ret_val = TokenAndSpan { tok: replace(&mut self.peek_tok, token::Underscore), sp: self.peek_span, }; - self.advance_token(); - ret_val + self.advance_token()?; + Ok(ret_val) } fn fatal(&self, m: &str) -> FatalError { self.fatal_span(self.peek_span, m) @@ -95,6 +114,12 @@ impl<'a> Reader for StringReader<'a> { fn err(&self, m: &str) { self.err_span(self.peek_span, m) } + fn emit_fatal_errors(&mut self) { + for err in &mut self.fatal_errs { + err.emit(); + } + self.fatal_errs.clear(); + } fn peek(&self) -> TokenAndSpan { // FIXME(pcwalton): Bad copy! TokenAndSpan { @@ -108,10 +133,11 @@ impl<'a> Reader for TtReader<'a> { fn is_eof(&self) -> bool { self.cur_tok == token::Eof } - fn next_token(&mut self) -> TokenAndSpan { + fn try_next_token(&mut self) -> Result<TokenAndSpan, ()> { + assert!(self.fatal_errs.is_empty()); let r = tt_next_token(self); debug!("TtReader: r={:?}", r); - r + Ok(r) } fn fatal(&self, m: &str) -> FatalError { self.sp_diag.span_fatal(self.cur_span, m) @@ -119,6 +145,12 @@ impl<'a> Reader for TtReader<'a> { fn err(&self, m: &str) { self.sp_diag.span_err(self.cur_span, m); } + fn emit_fatal_errors(&mut self) { + for err in &mut self.fatal_errs { + err.emit(); + } + self.fatal_errs.clear(); + } fn peek(&self) -> TokenAndSpan { TokenAndSpan { tok: self.cur_tok.clone(), @@ -151,6 +183,7 @@ impl<'a> StringReader<'a> { peek_tok: token::Eof, peek_span: codemap::DUMMY_SP, source_text: source_text, + fatal_errs: Vec::new(), }; sr.bump(); sr @@ -160,7 +193,10 @@ impl<'a> StringReader<'a> { filemap: Rc<codemap::FileMap>) -> StringReader<'b> { let mut sr = StringReader::new_raw(span_diagnostic, filemap); - sr.advance_token(); + if let Err(_) = sr.advance_token() { + sr.emit_fatal_errors(); + panic!(FatalError); + } sr } @@ -249,7 +285,7 @@ impl<'a> StringReader<'a> { /// Advance peek_tok and peek_span to refer to the next token, and /// possibly update the interner. - fn advance_token(&mut self) { + fn advance_token(&mut self) -> Result<(), ()> { match self.scan_whitespace_or_comment() { Some(comment) => { self.peek_span = comment.sp; @@ -261,11 +297,12 @@ impl<'a> StringReader<'a> { self.peek_span = codemap::mk_sp(self.filemap.end_pos, self.filemap.end_pos); } else { let start_bytepos = self.last_pos; - self.peek_tok = self.next_token_inner(); + self.peek_tok = self.next_token_inner()?; self.peek_span = codemap::mk_sp(start_bytepos, self.last_pos); }; } } + Ok(()) } fn byte_offset(&self, pos: BytePos) -> BytePos { @@ -1013,7 +1050,7 @@ impl<'a> StringReader<'a> { /// Return the next token from the string, advances the input past that /// token, and updates the interner - fn next_token_inner(&mut self) -> token::Token { + fn next_token_inner(&mut self) -> Result<token::Token, ()> { let c = self.curr; if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) { @@ -1033,32 +1070,32 @@ impl<'a> StringReader<'a> { self.bump(); } - return self.with_str_from(start, |string| { + return Ok(self.with_str_from(start, |string| { if string == "_" { token::Underscore } else { // FIXME: perform NFKC normalization here. (Issue #2253) token::Ident(str_to_ident(string)) } - }); + })); } if is_dec_digit(c) { let num = self.scan_number(c.unwrap()); let suffix = self.scan_optional_raw_name(); debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix); - return token::Literal(num, suffix); + return Ok(token::Literal(num, suffix)); } match c.expect("next_token_inner called at EOF") { // One-byte tokens. ';' => { self.bump(); - return token::Semi; + return Ok(token::Semi); } ',' => { self.bump(); - return token::Comma; + return Ok(token::Comma); } '.' => { self.bump(); @@ -1066,67 +1103,67 @@ impl<'a> StringReader<'a> { self.bump(); if self.curr_is('.') { self.bump(); - token::DotDotDot + Ok(token::DotDotDot) } else { - token::DotDot + Ok(token::DotDot) } } else { - token::Dot + Ok(token::Dot) }; } '(' => { self.bump(); - return token::OpenDelim(token::Paren); + return Ok(token::OpenDelim(token::Paren)); } ')' => { self.bump(); - return token::CloseDelim(token::Paren); + return Ok(token::CloseDelim(token::Paren)); } '{' => { self.bump(); - return token::OpenDelim(token::Brace); + return Ok(token::OpenDelim(token::Brace)); } '}' => { self.bump(); - return token::CloseDelim(token::Brace); + return Ok(token::CloseDelim(token::Brace)); } '[' => { self.bump(); - return token::OpenDelim(token::Bracket); + return Ok(token::OpenDelim(token::Bracket)); } ']' => { self.bump(); - return token::CloseDelim(token::Bracket); + return Ok(token::CloseDelim(token::Bracket)); } '@' => { self.bump(); - return token::At; + return Ok(token::At); } '#' => { self.bump(); - return token::Pound; + return Ok(token::Pound); } '~' => { self.bump(); - return token::Tilde; + return Ok(token::Tilde); } '?' => { self.bump(); - return token::Question; + return Ok(token::Question); } ':' => { self.bump(); if self.curr_is(':') { self.bump(); - return token::ModSep; + return Ok(token::ModSep); } else { - return token::Colon; + return Ok(token::Colon); } } '$' => { self.bump(); - return token::Dollar; + return Ok(token::Dollar); } // Multi-byte tokens. @@ -1134,21 +1171,21 @@ impl<'a> StringReader<'a> { self.bump(); if self.curr_is('=') { self.bump(); - return token::EqEq; + return Ok(token::EqEq); } else if self.curr_is('>') { self.bump(); - return token::FatArrow; + return Ok(token::FatArrow); } else { - return token::Eq; + return Ok(token::Eq); } } '!' => { self.bump(); if self.curr_is('=') { self.bump(); - return token::Ne; + return Ok(token::Ne); } else { - return token::Not; + return Ok(token::Not); } } '<' => { @@ -1156,21 +1193,21 @@ impl<'a> StringReader<'a> { match self.curr.unwrap_or('\x00') { '=' => { self.bump(); - return token::Le; + return Ok(token::Le); } '<' => { - return self.binop(token::Shl); + return Ok(self.binop(token::Shl)); } '-' => { self.bump(); match self.curr.unwrap_or('\x00') { _ => { - return token::LArrow; + return Ok(token::LArrow); } } } _ => { - return token::Lt; + return Ok(token::Lt); } } } @@ -1179,13 +1216,13 @@ impl<'a> StringReader<'a> { match self.curr.unwrap_or('\x00') { '=' => { self.bump(); - return token::Ge; + return Ok(token::Ge); } '>' => { - return self.binop(token::Shr); + return Ok(self.binop(token::Shr)); } _ => { - return token::Gt; + return Ok(token::Gt); } } } @@ -1233,7 +1270,7 @@ impl<'a> StringReader<'a> { self.err_span_(start, last_bpos, "lifetimes cannot use keyword names"); } - return token::Lifetime(ident); + return Ok(token::Lifetime(ident)); } let valid = self.scan_char_or_byte(start, @@ -1255,7 +1292,7 @@ impl<'a> StringReader<'a> { }; self.bump(); // advance curr past token let suffix = self.scan_optional_raw_name(); - return token::Literal(token::Char(id), suffix); + return Ok(token::Literal(token::Char(id), suffix)); } 'b' => { self.bump(); @@ -1266,7 +1303,7 @@ impl<'a> StringReader<'a> { _ => unreachable!(), // Should have been a token::Ident above. }; let suffix = self.scan_optional_raw_name(); - return token::Literal(lit, suffix); + return Ok(token::Literal(lit, suffix)); } '"' => { let start_bpos = self.last_pos; @@ -1297,7 +1334,7 @@ impl<'a> StringReader<'a> { }; self.bump(); let suffix = self.scan_optional_raw_name(); - return token::Literal(token::Str_(id), suffix); + return Ok(token::Literal(token::Str_(id), suffix)); } 'r' => { let start_bpos = self.last_pos; @@ -1368,24 +1405,24 @@ impl<'a> StringReader<'a> { token::intern("??") }; let suffix = self.scan_optional_raw_name(); - return token::Literal(token::StrRaw(id, hash_count), suffix); + return Ok(token::Literal(token::StrRaw(id, hash_count), suffix)); } '-' => { if self.nextch_is('>') { self.bump(); self.bump(); - return token::RArrow; + return Ok(token::RArrow); } else { - return self.binop(token::Minus); + return Ok(self.binop(token::Minus)); } } '&' => { if self.nextch_is('&') { self.bump(); self.bump(); - return token::AndAnd; + return Ok(token::AndAnd); } else { - return self.binop(token::And); + return Ok(self.binop(token::And)); } } '|' => { @@ -1393,27 +1430,27 @@ impl<'a> StringReader<'a> { Some('|') => { self.bump(); self.bump(); - return token::OrOr; + return Ok(token::OrOr); } _ => { - return self.binop(token::Or); + return Ok(self.binop(token::Or)); } } } '+' => { - return self.binop(token::Plus); + return Ok(self.binop(token::Plus)); } '*' => { - return self.binop(token::Star); + return Ok(self.binop(token::Star)); } '/' => { - return self.binop(token::Slash); + return Ok(self.binop(token::Slash)); } '^' => { - return self.binop(token::Caret); + return Ok(self.binop(token::Caret)); } '%' => { - return self.binop(token::Percent); + return Ok(self.binop(token::Percent)); } c => { let last_bpos = self.last_pos; @@ -1423,8 +1460,8 @@ impl<'a> StringReader<'a> { "unknown start of token", c); unicode_chars::check_for_substitution(&self, c, &mut err); - err.emit(); - panic!(FatalError); + self.fatal_errs.push(err); + Err(()) } } } diff --git a/src/test/rustdoc/issue-12834.rs b/src/test/rustdoc/issue-12834.rs new file mode 100644 index 00000000000..30dce27e738 --- /dev/null +++ b/src/test/rustdoc/issue-12834.rs @@ -0,0 +1,21 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Tests that failing to syntax highlight a rust code-block doesn't cause +// rustdoc to fail, while still rendering the code-block (without highlighting). + + +// @has issue_12834/fn.foo.html +// @has - //pre 'a + b ' + +/// ``` +/// a + b ∈ Self ∀ a, b ∈ Self +/// ``` +pub fn foo() {} |
