diff options
| author | bors <bors@rust-lang.org> | 2019-07-26 16:57:54 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2019-07-26 16:57:54 +0000 |
| commit | c43753f910aae000f8bcb0a502407ea332afc74b (patch) | |
| tree | ad563761b27efd2cbab0ade95c5139aaccb93494 /src/libsyntax/parse | |
| parent | 1a563362865e6051d4c350544131228e8eff5138 (diff) | |
| parent | 232d27c306d76d2f973c88b0e0d1883aac8717f4 (diff) | |
| download | rust-c43753f910aae000f8bcb0a502407ea332afc74b.tar.gz rust-c43753f910aae000f8bcb0a502407ea332afc74b.zip | |
Auto merge of #63015 - Centril:rollup-ydhpcas, r=Centril
Rollup of 22 pull requests Successful merges: - #62084 (allow clippy::unreadable_literal in unicode tables) - #62120 (Add missing type links in documentation) - #62310 (Add missing doc links in boxed module) - #62421 (Introduce `as_deref` to Option) - #62583 (Implement Unpin for all raw pointers) - #62692 (rustc: precompute the largest Niche and store it in LayoutDetails.) - #62801 (Remove support for -Zlower-128bit-ops) - #62828 (Remove vector fadd/fmul reduction workarounds) - #62862 (code cleanup) - #62904 (Disable d32 on armv6 hf targets) - #62907 (Initialize the MSP430 AsmParser) - #62956 (Implement slow-path for FirstSets::first) - #62963 (Allow lexer to recover from some homoglyphs) - #62964 (clarify and unify some type test names) - #62970 (ci: gate toolstate repo pushes on the TOOLSTATE_PUBLISH envvar) - #62980 (std: Add more accessors for `Metadata` on Windows) - #62983 (Remove needless indirection through Rc) - #62985 (librustc_errors: Support ui-testing flag in annotate-snippet emitter) - #63002 (error_index_generator should output stdout/stderr when it panics.) - #63004 (Add test for issue-54062) - #63007 (ci: debug network failures while downloading awscli from PyPI) - #63009 (Remove redundant `mut` from variable declaration.) Failed merges: r? @ghost
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 14 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/unicode_chars.rs | 73 |
2 files changed, 52 insertions, 35 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b97801a50d4..52f65e1b474 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -389,8 +389,18 @@ impl<'a> StringReader<'a> { self.pos, "unknown start of token", c); - unicode_chars::check_for_substitution(self, start, c, &mut err); - return Err(err) + // FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, + // instead of keeping a table in `check_for_substitution`into the token. Ideally, + // this should be inside `rustc_lexer`. However, we should first remove compound + // tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it, + // as there will be less overall work to do this way. + return match unicode_chars::check_for_substitution(self, start, c, &mut err) { + Some(token) => { + err.emit(); + Ok(token) + } + None => Err(err), + } } }; Ok(kind) diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index b728a9e1988..eaa736c6a35 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -3,7 +3,8 @@ use super::StringReader; use errors::{Applicability, DiagnosticBuilder}; -use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; +use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION, symbol::kw}; +use crate::parse::token; #[rustfmt::skip] // for line breaks const UNICODE_ARRAY: &[(char, &str, char)] = &[ @@ -297,32 +298,38 @@ const UNICODE_ARRAY: &[(char, &str, char)] = &[ ('>', "Fullwidth Greater-Than Sign", '>'), ]; -const ASCII_ARRAY: &[(char, &str)] = &[ - (' ', "Space"), - ('_', "Underscore"), - ('-', "Minus/Hyphen"), - (',', "Comma"), - (';', "Semicolon"), - (':', "Colon"), - ('!', "Exclamation Mark"), - ('?', "Question Mark"), - ('.', "Period"), - ('\'', "Single Quote"), - ('"', "Quotation Mark"), - ('(', "Left Parenthesis"), - (')', "Right Parenthesis"), - ('[', "Left Square Bracket"), - (']', "Right Square Bracket"), - ('{', "Left Curly Brace"), - ('}', "Right Curly Brace"), - ('*', "Asterisk"), - ('/', "Slash"), - ('\\', "Backslash"), - ('&', "Ampersand"), - ('+', "Plus Sign"), - ('<', "Less-Than Sign"), - ('=', "Equals Sign"), - ('>', "Greater-Than Sign"), +// FIXME: the lexer could be used to turn the ASCII version of unicode homoglyphs, instead of +// keeping the substitution token in this table. Ideally, this should be inside `rustc_lexer`. +// However, we should first remove compound tokens like `<<` from `rustc_lexer`, and then add +// fancier error recovery to it, as there will be less overall work to do this way. +const ASCII_ARRAY: &[(char, &str, Option<token::TokenKind>)] = &[ + (' ', "Space", Some(token::Whitespace)), + ('_', "Underscore", Some(token::Ident(kw::Underscore, false))), + ('-', "Minus/Hyphen", Some(token::BinOp(token::Minus))), + (',', "Comma", Some(token::Comma)), + (';', "Semicolon", Some(token::Semi)), + (':', "Colon", Some(token::Colon)), + ('!', "Exclamation Mark", Some(token::Not)), + ('?', "Question Mark", Some(token::Question)), + ('.', "Period", Some(token::Dot)), + ('(', "Left Parenthesis", Some(token::OpenDelim(token::Paren))), + (')', "Right Parenthesis", Some(token::CloseDelim(token::Paren))), + ('[', "Left Square Bracket", Some(token::OpenDelim(token::Bracket))), + (']', "Right Square Bracket", Some(token::CloseDelim(token::Bracket))), + ('{', "Left Curly Brace", Some(token::OpenDelim(token::Brace))), + ('}', "Right Curly Brace", Some(token::CloseDelim(token::Brace))), + ('*', "Asterisk", Some(token::BinOp(token::Star))), + ('/', "Slash", Some(token::BinOp(token::Slash))), + ('\\', "Backslash", None), + ('&', "Ampersand", Some(token::BinOp(token::And))), + ('+', "Plus Sign", Some(token::BinOp(token::Plus))), + ('<', "Less-Than Sign", Some(token::Lt)), + ('=', "Equals Sign", Some(token::Eq)), + ('>', "Greater-Than Sign", Some(token::Gt)), + // FIXME: Literals are already lexed by this point, so we can't recover gracefully just by + // spitting the correct token out. + ('\'', "Single Quote", None), + ('"', "Quotation Mark", None), ]; crate fn check_for_substitution<'a>( @@ -330,20 +337,20 @@ crate fn check_for_substitution<'a>( pos: BytePos, ch: char, err: &mut DiagnosticBuilder<'a>, -) -> bool { +) -> Option<token::TokenKind> { let (u_name, ascii_char) = match UNICODE_ARRAY.iter().find(|&&(c, _, _)| c == ch) { Some(&(_u_char, u_name, ascii_char)) => (u_name, ascii_char), - None => return false, + None => return None, }; let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION); - let ascii_name = match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) { - Some((_ascii_char, ascii_name)) => ascii_name, + let (ascii_name, token) = match ASCII_ARRAY.iter().find(|&&(c, _, _)| c == ascii_char) { + Some((_ascii_char, ascii_name, token)) => (ascii_name, token), None => { let msg = format!("substitution character not found for '{}'", ch); reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); - return false; + return None; } }; @@ -371,7 +378,7 @@ crate fn check_for_substitution<'a>( ); err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect); } - true + token.clone() } /// Extract string if found at current position with given delimiters |
