diff options
| author | Charles Lew <crlf0710@gmail.com> | 2020-05-01 12:34:04 +0800 |
|---|---|---|
| committer | Charles Lew <crlf0710@gmail.com> | 2020-05-03 02:31:13 +0800 |
| commit | f3ec00a5fecabe5587bdd2f7edee86a755b1f52b (patch) | |
| tree | 059f96d14e65ba3dd2474eb4013f61b118dfb3b7 /src | |
| parent | c05961c2db4b02f599c8509031818339e10d592a (diff) | |
| download | rust-f3ec00a5fecabe5587bdd2f7edee86a755b1f52b.tar.gz rust-f3ec00a5fecabe5587bdd2f7edee86a755b1f52b.zip | |
Add a fast code path to optimize `confusable_idents` lint for ASCII code base.
Diffstat (limited to 'src')
| -rw-r--r-- | src/librustc_lint/non_ascii_idents.rs | 53 |
1 files changed, 49 insertions, 4 deletions
diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs index 522f292dc94..94fc1a228df 100644 --- a/src/librustc_lint/non_ascii_idents.rs +++ b/src/librustc_lint/non_ascii_idents.rs @@ -2,6 +2,8 @@ use crate::{EarlyContext, EarlyLintPass, LintContext}; use rustc_ast::ast; use rustc_data_structures::fx::FxHashMap; use rustc_span::symbol::SymbolStr; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; declare_lint! { pub NON_ASCII_IDENTS, @@ -24,9 +26,6 @@ declare_lint! { declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]); -use std::hash::{Hash, Hasher}; -use std::ops::Deref; - enum CowBoxSymStr { Interned(SymbolStr), Owned(Box<str>), @@ -73,6 +72,35 @@ fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr } } +fn is_in_ascii_confusable_closure(c: char) -> bool { + // FIXME: move this table to `unicode_security` crate. + // data here corresponds to Unicode 13. + const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)]; + let c = c as u64; + for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE { + if c >= range_start && c <= range_end { + return true; + } + } + false +} + +fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool { + // FIXME: move this table to `unicode_security` crate. + // data here corresponds to Unicode 13. + const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[ + 0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba, + 0x2080, + ]; + let c = c as u64; + for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST { + if c == item { + return true; + } + } + false +} + impl EarlyLintPass for NonAsciiIdents { fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { use rustc_session::lint::Level; @@ -81,9 +109,26 @@ impl EarlyLintPass for NonAsciiIdents { } let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len()); + let mut in_fast_path = true; for (symbol, sp) in symbols.iter() { + // fast path let symbol_str = symbol.as_str(); - symbol_strs_and_spans.push((symbol_str, *sp)); + if !symbol_str.chars().all(is_in_ascii_confusable_closure) { + // fallback to slow path. + symbol_strs_and_spans.clear(); + in_fast_path = false; + break; + } + if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) { + symbol_strs_and_spans.push((symbol_str, *sp)); + } + } + if !in_fast_path { + // slow path + for (symbol, sp) in symbols.iter() { + let symbol_str = symbol.as_str(); + symbol_strs_and_spans.push((symbol_str, *sp)); + } } drop(symbols); symbol_strs_and_spans.sort_by_key(|x| x.0.clone()); |
