diff options
| author | Esteban Küber <esteban@kuber.com.ar> | 2019-07-24 16:10:42 -0700 |
|---|---|---|
| committer | Esteban Küber <esteban@kuber.com.ar> | 2019-07-24 16:10:42 -0700 |
| commit | 70c817aee3aa204122b64cdfc2db05fa182da1c5 (patch) | |
| tree | 60a0178103feb7ad6c09aa86eb115dfb8eeabc2e /src/libsyntax/parse/lexer | |
| parent | 27a6a304e2baaabca88059753f020377f2476978 (diff) | |
| download | rust-70c817aee3aa204122b64cdfc2db05fa182da1c5.tar.gz rust-70c817aee3aa204122b64cdfc2db05fa182da1c5.zip | |
Allow lexer to recover from some homoglyphs
Diffstat (limited to 'src/libsyntax/parse/lexer')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 5 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/unicode_chars.rs | 65 |
2 files changed, 37 insertions, 33 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b97801a50d4..412ed8f04b3 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -389,7 +389,10 @@ impl<'a> StringReader<'a> { self.pos, "unknown start of token", c); - unicode_chars::check_for_substitution(self, start, c, &mut err); + if let Some(t) = unicode_chars::check_for_substitution(self, start, c, &mut err) { + err.emit(); + return Ok(t); + } return Err(err) } }; diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index b728a9e1988..bfa1606a0d2 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -4,6 +4,7 @@ use super::StringReader; use errors::{Applicability, DiagnosticBuilder}; use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; +use crate::parse::token; #[rustfmt::skip] // for line breaks const UNICODE_ARRAY: &[(char, &str, char)] = &[ @@ -297,32 +298,32 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[ ('>', "Fullwidth Greater-Than Sign", '>'), ]; -const ASCII_ARRAY: &[(char, &str)] = &[ - (' ', "Space"), - ('_', "Underscore"), - ('-', "Minus/Hyphen"), - (',', "Comma"), - (';', "Semicolon"), - (':', "Colon"), - ('!', "Exclamation Mark"), - ('?', "Question Mark"), - ('.', "Period"), - ('\'', "Single Quote"), - ('"', "Quotation Mark"), - ('(', "Left Parenthesis"), - (')', "Right Parenthesis"), - ('[', "Left Square Bracket"), - (']', "Right Square Bracket"), - ('{', "Left Curly Brace"), - ('}', "Right Curly Brace"), - ('*', "Asterisk"), - ('/', "Slash"), - ('\\', "Backslash"), - ('&', "Ampersand"), - ('+', "Plus Sign"), - ('<', "Less-Than Sign"), - ('=', "Equals Sign"), - ('>', "Greater-Than Sign"), +const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ + (' ', "Space", Some(token::Whitespace)), + ('_', "Underscore", None), + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), + (',', "Comma", Some(token::Comma)), + (';', "Semicolon", Some(token::Semi)), + (':', "Colon", Some(token::Colon)), + ('!', "Exclamation Mark", Some(token::Not)), + ('?', "Question Mark", Some(token::Question)), + ('.', "Period", Some(token::Dot)), + ('\'', "Single Quote", None), // Literals are already lexed by this point, so we can't recover + ('"', "Quotation Mark", None), // gracefully just by spitting the correct token out. + ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), + (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), + ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), + (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))), + ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))), + ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))), + ('*', "Asterisk", Some(token::BinOp(token::Star))), + ('/', "Slash", Some(token::BinOp(token::Slash))), + ('\\', "Backslash", None), + ('&', "Ampersand", Some(token::BinOp(token::And))), + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), + ('<', "Less-Than Sign", Some(token::Lt)), + ('=', "Equals Sign", Some(token::Eq)), + ('>', "Greater-Than Sign", Some(token::Gt)), ]; crate fn check_for_substitution<'a>( @@ -330,20 +331,20 @@ crate fn check_for_substitution<'a>( pos: BytePos, ch: char, err: &mut DiagnosticBuilder<'a>, -) -> bool { +) -> Option<token::TokenKind> { let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) { Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char), - None => return false, + None => return None, }; let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION); - let ascii_name = match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) { - Some((_ascii_char, ascii_name)) => ascii_name, + let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) { + Some((_ascii_char, ascii_name, token)) => (ascii_name, token), None => { let msg = format!("substitution character not found for '{}'", ch); reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); - return false; + return None; } }; @@ -371,7 +372,7 @@ crate fn check_for_substitution<'a>( ); err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect); } - true + token.clone() } /// Extract string if found at current position with given delimiters |
