diff options
| author | Esteban Küber <esteban@kuber.com.ar> | 2019-07-25 11:22:46 -0700 |
|---|---|---|
| committer | Esteban Küber <esteban@kuber.com.ar> | 2019-07-25 12:36:51 -0700 |
| commit | 684497648ae22a69df80d410de643385c2cc86d4 (patch) | |
| tree | 6152caa927a031bd4aa6506bbb21ff7bbcd72d58 /src/libsyntax/parse/lexer/unicode_chars.rs | |
| parent | 70c817aee3aa204122b64cdfc2db05fa182da1c5 (diff) | |
| download | rust-684497648ae22a69df80d410de643385c2cc86d4.tar.gz rust-684497648ae22a69df80d410de643385c2cc86d4.zip | |
review comments: add FIXME comments and formatting
Diffstat (limited to 'src/libsyntax/parse/lexer/unicode_chars.rs')
| -rw-r--r-- | src/libsyntax/parse/lexer/unicode_chars.rs | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index bfa1606a0d2..eaa736c6a35 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -3,7 +3,7 @@ use super::StringReader; use errors::{Applicability, DiagnosticBuilder}; -use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; +use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw}; use crate::parse::token; #[rustfmt::skip] // for line breaks @@ -298,9 +298,13 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[ ('>', "Fullwidth Greater-Than Sign", '>'), ]; +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add +// fancier error recovery to it, as there will be less overall work to do this way. const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ (' ', "Space", Some(token::Whitespace)), - ('_', "Underscore", None), + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), (',', "Comma", Some(token::Comma)), (';', "Semicolon", Some(token::Semi)), @@ -308,8 +312,6 @@ const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ ('!', "Exclamation Mark", Some(token::Not)), ('?', "Question Mark", Some(token::Question)), ('.', "Period", Some(token::Dot)), - ('\'', "Single Quote", None), // Literals are already lexed by this point, so we can't recover - ('"', "Quotation Mark", None), // gracefully just by spitting the correct token out. ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), @@ -324,6 +326,10 @@ const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ ('<', "Less-Than Sign", Some(token::Lt)), ('=', "Equals Sign", Some(token::Eq)), ('>', "Greater-Than Sign", Some(token::Gt)), + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by + // spitting the correct token out. + ('\'', "Single Quote", None), + ('"', "Quotation Mark", None), ]; crate fn check_for_substitution<'a>( |
