about summary refs log tree commit diff
path: root/src/libsyntax/parse
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2019-07-26 16:57:54 +0000
committerbors <bors@rust-lang.org>2019-07-26 16:57:54 +0000
commitc43753f910aae000f8bcb0a502407ea332afc74b (patch)
treead563761b27efd2cbab0ade95c5139aaccb93494 /src/libsyntax/parse
parent1a563362865e6051d4c350544131228e8eff5138 (diff)
parent232d27c306d76d2f973c88b0e0d1883aac8717f4 (diff)
downloadrust-c43753f910aae000f8bcb0a502407ea332afc74b.tar.gz
rust-c43753f910aae000f8bcb0a502407ea332afc74b.zip
Auto merge of #63015 - Centril:rollup-ydhpcas, r=Centril
Rollup of 22 pull requests

Successful merges:

 - #62084 (allow clippy::unreadable_literal in unicode tables)
 - #62120 (Add missing type links in documentation)
 - #62310 (Add missing doc links in boxed module)
 - #62421 (Introduce `as_deref` to Option)
 - #62583 (Implement Unpin for all raw pointers)
 - #62692 (rustc: precompute the largest Niche and store it in LayoutDetails.)
 - #62801 (Remove support for -Zlower-128bit-ops)
 - #62828 (Remove vector fadd/fmul reduction workarounds)
 - #62862 (code cleanup)
 - #62904 (Disable d32 on armv6 hf targets)
 - #62907 (Initialize the MSP430 AsmParser)
 - #62956 (Implement slow-path for FirstSets::first)
 - #62963 (Allow lexer to recover from some homoglyphs)
 - #62964 (clarify and unify some type test names)
 - #62970 (ci: gate toolstate repo pushes on the TOOLSTATE_PUBLISH envvar)
 - #62980 (std: Add more accessors for `Metadata` on Windows)
 - #62983 (Remove needless indirection through Rc)
 - #62985 (librustc_errors: Support ui-testing flag in annotate-snippet emitter)
 - #63002 (error_index_generator should output stdout/stderr when it panics.)
 - #63004 (Add test for issue-54062)
 - #63007 (ci: debug network failures while downloading awscli from PyPI)
 - #63009 (Remove redundant `mut` from variable declaration.)

Failed merges:

r? @ghost
Diffstat (limited to 'src/libsyntax/parse')
-rw-r--r--src/libsyntax/parse/lexer/mod.rs14
-rw-r--r--src/libsyntax/parse/lexer/unicode_chars.rs73
2 files changed, 52 insertions, 35 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index b97801a50d4..52f65e1b474 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -389,8 +389,18 @@ impl<'a> StringReader<'a> {
                                                           self.pos,
                                                           "unknown start of token",
                                                           c);
-                unicode_chars::check_for_substitution(self, start, c, &mut err);
-                return Err(err)
+                // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs,
+                // instead of keeping a table in `check_for_substitution`into the token. Ideally,
+                // this should be inside `rustc_lexer`. However, we should first remove compound
+                // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
+                // as there will be less overall work to do this way.
+                return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
+                    Some(token) => {
+                        err.emit();
+                        Ok(token)
+                    }
+                    None => Err(err),
+                }
             }
         };
         Ok(kind)
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
index b728a9e1988..eaa736c6a35 100644
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -3,7 +3,8 @@
 
 use super::StringReader;
 use errors::{Applicability, DiagnosticBuilder};
-use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
+use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw};
+use crate::parse::token;
 
 #[rustfmt::skip] // for line breaks
 const UNICODE_ARRAY: &[(char, &str, char)] = &[
@@ -297,32 +298,38 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[
     ('>', "Fullwidth Greater-Than Sign", '>'),
 ];
 
-const ASCII_ARRAY: &[(char, &str)] = &[
-    (' ', "Space"),
-    ('_', "Underscore"),
-    ('-', "Minus/Hyphen"),
-    (',', "Comma"),
-    (';', "Semicolon"),
-    (':', "Colon"),
-    ('!', "Exclamation Mark"),
-    ('?', "Question Mark"),
-    ('.', "Period"),
-    ('\'', "Single Quote"),
-    ('"', "Quotation Mark"),
-    ('(', "Left Parenthesis"),
-    (')', "Right Parenthesis"),
-    ('[', "Left Square Bracket"),
-    (']', "Right Square Bracket"),
-    ('{', "Left Curly Brace"),
-    ('}', "Right Curly Brace"),
-    ('*', "Asterisk"),
-    ('/', "Slash"),
-    ('\\', "Backslash"),
-    ('&', "Ampersand"),
-    ('+', "Plus Sign"),
-    ('<', "Less-Than Sign"),
-    ('=', "Equals Sign"),
-    ('>', "Greater-Than Sign"),
+// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of
+// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`.
+// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add
+// fancier error recovery to it, as there will be less overall work to do this way.
+const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[
+    (' ', "Space", Some(token::Whitespace)),
+    ('_', "Underscore", Some(token::Ident(kw::Underscore, false))),
+    ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))),
+    (',', "Comma", Some(token::Comma)),
+    (';', "Semicolon", Some(token::Semi)),
+    (':', "Colon", Some(token::Colon)),
+    ('!', "Exclamation Mark", Some(token::Not)),
+    ('?', "Question Mark", Some(token::Question)),
+    ('.', "Period", Some(token::Dot)),
+    ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))),
+    (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))),
+    ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))),
+    (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))),
+    ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))),
+    ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))),
+    ('*', "Asterisk", Some(token::BinOp(token::Star))),
+    ('/', "Slash", Some(token::BinOp(token::Slash))),
+    ('\\', "Backslash", None),
+    ('&', "Ampersand", Some(token::BinOp(token::And))),
+    ('+', "Plus Sign", Some(token::BinOp(token::Plus))),
+    ('<', "Less-Than Sign", Some(token::Lt)),
+    ('=', "Equals Sign", Some(token::Eq)),
+    ('>', "Greater-Than Sign", Some(token::Gt)),
+    // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by
+    // spitting the correct token out.
+    ('\'', "Single Quote", None),
+    ('"', "Quotation Mark", None),
 ];
 
 crate fn check_for_substitution<'a>(
@@ -330,20 +337,20 @@ crate fn check_for_substitution<'a>(
     pos: BytePos,
     ch: char,
     err: &mut DiagnosticBuilder<'a>,
-) -> bool {
+) -> Option<token::TokenKind> {
     let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) {
         Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char),
-        None => return false,
+        None => return None,
     };
 
     let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION);
 
-    let ascii_name = match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) {
-        Some((_ascii_char, ascii_name)) => ascii_name,
+    let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) {
+        Some((_ascii_char, ascii_name, token)) => (ascii_name, token),
         None => {
             let msg = format!("substitution character not found for '{}'", ch);
             reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
-            return false;
+            return None;
         }
     };
 
@@ -371,7 +378,7 @@ crate fn check_for_substitution<'a>(
         );
         err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect);
     }
-    true
+    token.clone()
 }
 
 /// Extract string if found at current position with given delimiters