about summary refs log tree commit diff
path: root/src/libsyntax/parse/lexer/unicode_chars.rs
diff options
context:
space:
mode:
authorEsteban Küber <esteban@kuber.com.ar>2019-07-25 11:22:46 -0700
committerEsteban Küber <esteban@kuber.com.ar>2019-07-25 12:36:51 -0700
commit684497648ae22a69df80d410de643385c2cc86d4 (patch)
tree6152caa927a031bd4aa6506bbb21ff7bbcd72d58 /src/libsyntax/parse/lexer/unicode_chars.rs
parent70c817aee3aa204122b64cdfc2db05fa182da1c5 (diff)
downloadrust-684497648ae22a69df80d410de643385c2cc86d4.tar.gz
rust-684497648ae22a69df80d410de643385c2cc86d4.zip
review comments: add FIXME comments and formatting
Diffstat (limited to 'src/libsyntax/parse/lexer/unicode_chars.rs')
-rw-r--r--src/libsyntax/parse/lexer/unicode_chars.rs14
1 files changed, 10 insertions, 4 deletions
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
index bfa1606a0d2..eaa736c6a35 100644
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -3,7 +3,7 @@
 
 use super::StringReader;
 use errors::{Applicability, DiagnosticBuilder};
-use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
+use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw};
 use crate::parse::token;
 
 #[rustfmt::skip] // for line breaks
@@ -298,9 +298,13 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[
     ('>', "Fullwidth Greater-Than Sign", '>'),
 ];
 
+// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
+// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
+// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
+// fancier error recovery to it, as there will be less overall work to do this way.
 const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
     (' ', "Space", Some(token::Whitespace)),
-    ('_', "Underscore", None),
+    ('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
     ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
     (',', "Comma", Some(token::Comma)),
     (';', "Semicolon", Some(token::Semi)),
@@ -308,8 +312,6 @@ const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
     ('!', "Exclamation Mark", Some(token::Not)),
     ('?', "Question Mark", Some(token::Question)),
     ('.', "Period", Some(token::Dot)),
-    ('\'', "Single Quote", None),  // Literals are already lexed by this point, so we can't recover
-    ('"', "Quotation Mark", None), // gracefully just by spitting the correct token out.
     ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))),
     (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))),
     ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))),
@@ -324,6 +326,10 @@ const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
     ('<', "Less-Than Sign", Some(token::Lt)),
     ('=', "Equals Sign", Some(token::Eq)),
     ('>', "Greater-Than Sign", Some(token::Gt)),
+    // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
+    // spitting the correct token out.
+    ('\'', "Single Quote", None),
+    ('"', "Quotation Mark", None),
 ];
 
 crate fn check_for_substitution<'a>(