From b5e35b128efeed4bfdb4b1ee9d0697389ec9f164 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 30 Jul 2019 12:33:32 +0300 Subject: remove special code path for unknown tokens --- src/libsyntax/parse/lexer/mod.rs | 73 +++++++--------------------------------- 1 file changed, 13 insertions(+), 60 deletions(-) (limited to 'src/libsyntax/parse') diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 950b1b2ff53..c209ae1cb9f 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -3,7 +3,7 @@ use crate::parse::token::{self, Token, TokenKind}; use crate::symbol::{sym, Symbol}; use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; -use errors::{FatalError, Diagnostic, DiagnosticBuilder}; +use errors::{FatalError, DiagnosticBuilder}; use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; use rustc_lexer::Base; use rustc_lexer::unescape; @@ -39,7 +39,6 @@ pub struct StringReader<'a> { pos: BytePos, /// Stop reading src at this index. end_src_index: usize, - fatal_errs: Vec>, /// Source text to tokenize. src: Lrc, override_span: Option, @@ -62,7 +61,6 @@ impl<'a> StringReader<'a> { pos: source_file.start_pos, end_src_index: src.len(), src, - fatal_errs: Vec::new(), override_span, } } @@ -89,29 +87,17 @@ impl<'a> StringReader<'a> { self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION)) } - fn unwrap_or_abort(&mut self, res: Result) -> Token { - match res { - Ok(tok) => tok, - Err(_) => { - self.emit_fatal_errors(); - FatalError.raise(); - } - } - } - /// Returns the next token, including trivia like whitespace or comments. /// /// `Err(())` means that some errors were encountered, which can be /// retrieved using `buffer_fatal_errors`. - pub fn try_next_token(&mut self) -> Result { - assert!(self.fatal_errs.is_empty()); - + pub fn next_token(&mut self) -> Token { let start_src_index = self.src_index(self.pos); let text: &str = &self.src[start_src_index..self.end_src_index]; if text.is_empty() { let span = self.mk_sp(self.pos, self.pos); - return Ok(Token::new(token::Eof, span)); + return Token::new(token::Eof, span); } { @@ -125,7 +111,7 @@ impl<'a> StringReader<'a> { let kind = token::Shebang(sym); let span = self.mk_sp(start, self.pos); - return Ok(Token::new(kind, span)); + return Token::new(kind, span); } } } @@ -139,39 +125,10 @@ impl<'a> StringReader<'a> { // This could use `?`, but that makes code significantly (10-20%) slower. // https://github.com/rust-lang/rust/issues/37939 - let kind = match self.cook_lexer_token(token.kind, start) { - Ok(it) => it, - Err(err) => return Err(self.fatal_errs.push(err)), - }; + let kind = self.cook_lexer_token(token.kind, start); let span = self.mk_sp(start, self.pos); - Ok(Token::new(kind, span)) - } - - /// Returns the next token, including trivia like whitespace or comments. - /// - /// Aborts in case of an error. - pub fn next_token(&mut self) -> Token { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - - fn emit_fatal_errors(&mut self) { - for err in &mut self.fatal_errs { - err.emit(); - } - - self.fatal_errs.clear(); - } - - pub fn buffer_fatal_errors(&mut self) -> Vec { - let mut buffer = Vec::new(); - - for err in self.fatal_errs.drain(..) { - err.buffer(&mut buffer); - } - - buffer + Token::new(kind, span) } /// Report a fatal lexical error with a given span. @@ -218,8 +175,8 @@ impl<'a> StringReader<'a> { &self, token: rustc_lexer::TokenKind, start: BytePos, - ) -> Result> { - let kind = match token { + ) -> TokenKind { + match token { rustc_lexer::TokenKind::LineComment => { let string = self.str_from(start); // comments with only more "/"s are not doc comments @@ -396,16 +353,12 @@ impl<'a> StringReader<'a> { // this should be inside `rustc_lexer`. However, we should first remove compound // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, // as there will be less overall work to do this way. - return match unicode_chars::check_for_substitution(self, start, c, &mut err) { - Some(token) => { - err.emit(); - Ok(token) - } - None => Err(err), - } + let token = unicode_chars::check_for_substitution(self, start, c, &mut err) + .unwrap_or(token::Whitespace); + err.emit(); + token } - }; - Ok(kind) + } } fn cook_lexer_literal( -- cgit 1.4.1-3-g733a5 From 58ac81a60fe11868b0748a406d8e0b97efa4e8c5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 30 Jul 2019 12:31:41 +0300 Subject: add unknown token --- src/librustc/ich/impls_syntax.rs | 3 ++- src/librustdoc/html/highlight.rs | 2 +- src/libsyntax/ext/proc_macro_server.rs | 2 +- src/libsyntax/parse/lexer/mod.rs | 2 +- src/libsyntax/parse/lexer/tokentrees.rs | 2 +- src/libsyntax/parse/token.rs | 4 +++- src/libsyntax/print/pprust.rs | 1 + 7 files changed, 10 insertions(+), 6 deletions(-) (limited to 'src/libsyntax/parse') diff --git a/src/librustc/ich/impls_syntax.rs b/src/librustc/ich/impls_syntax.rs index 0c9c9adcf9d..5cc8324b316 100644 --- a/src/librustc/ich/impls_syntax.rs +++ b/src/librustc/ich/impls_syntax.rs @@ -363,7 +363,8 @@ impl<'a> HashStable> for token::TokenKind { } token::DocComment(val) | - token::Shebang(val) => val.hash_stable(hcx, hasher), + token::Shebang(val) | + token::Unknown(val) => val.hash_stable(hcx, hasher), } } } diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 8132074d6e0..92d85ef9cac 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -237,7 +237,7 @@ impl<'a> Classifier<'a> { return Ok(()); }, - token::Whitespace => Class::None, + token::Whitespace | token::Unknown(..) => Class::None, token::Comment => Class::Comment, token::DocComment(..) => Class::DocComment, diff --git a/src/libsyntax/ext/proc_macro_server.rs b/src/libsyntax/ext/proc_macro_server.rs index 8d0023c9ab1..36621ce7775 100644 --- a/src/libsyntax/ext/proc_macro_server.rs +++ b/src/libsyntax/ext/proc_macro_server.rs @@ -184,7 +184,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec)> } OpenDelim(..) | CloseDelim(..) => unreachable!(), - Whitespace | Comment | Shebang(..) | Eof => unreachable!(), + Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => unreachable!(), } } } diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index c209ae1cb9f..e86d4c7fde6 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -354,7 +354,7 @@ impl<'a> StringReader<'a> { // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, // as there will be less overall work to do this way. let token = unicode_chars::check_for_substitution(self, start, c, &mut err) - .unwrap_or(token::Whitespace); + .unwrap_or_else(|| token::Unknown(self.symbol_from(start))); err.emit(); token } diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 830fbec58de..37e67a2729e 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -217,7 +217,7 @@ impl<'a> TokenTreesReader<'a> { loop { let token = self.string_reader.next_token(); match token.kind { - token::Whitespace | token::Comment | token::Shebang(_) => { + token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => { self.joint_to_prev = NonJoint; } _ => { diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 73adb5c947c..be800b4de66 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -255,6 +255,8 @@ pub enum TokenKind { /// A comment. Comment, Shebang(ast::Name), + /// A completely invalid token which should be skipped. + Unknown(ast::Name), Eof, } @@ -603,7 +605,7 @@ impl Token { DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar | Question | OpenDelim(..) | CloseDelim(..) | Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) | - Whitespace | Comment | Shebang(..) | Eof => return None, + Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None, }; Some(Token::new(kind, self.span.to(joint.span))) diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 2ef8a919b9c..378ba1e4107 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -288,6 +288,7 @@ fn token_kind_to_string_ext(tok: &TokenKind, convert_dollar_crate: Option) token::Whitespace => " ".to_string(), token::Comment => "/* */".to_string(), token::Shebang(s) => format!("/* shebang: {}*/", s), + token::Unknown(s) => s.to_string(), token::Interpolated(ref nt) => nonterminal_to_string(nt), } -- cgit 1.4.1-3-g733a5