about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorCharles Lew <crlf0710@gmail.com>2020-05-01 12:34:04 +0800
committerCharles Lew <crlf0710@gmail.com>2020-05-03 02:31:13 +0800
commitf3ec00a5fecabe5587bdd2f7edee86a755b1f52b (patch)
tree059f96d14e65ba3dd2474eb4013f61b118dfb3b7 /src
parentc05961c2db4b02f599c8509031818339e10d592a (diff)
downloadrust-f3ec00a5fecabe5587bdd2f7edee86a755b1f52b.tar.gz
rust-f3ec00a5fecabe5587bdd2f7edee86a755b1f52b.zip
Add a fast code path to optimize `confusable_idents` lint for ASCII code base.
Diffstat (limited to 'src')
-rw-r--r--src/librustc_lint/non_ascii_idents.rs53
1 files changed, 49 insertions, 4 deletions
diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs
index 522f292dc94..94fc1a228df 100644
--- a/src/librustc_lint/non_ascii_idents.rs
+++ b/src/librustc_lint/non_ascii_idents.rs
@@ -2,6 +2,8 @@ use crate::{EarlyContext, EarlyLintPass, LintContext};
 use rustc_ast::ast;
 use rustc_data_structures::fx::FxHashMap;
 use rustc_span::symbol::SymbolStr;
+use std::hash::{Hash, Hasher};
+use std::ops::Deref;
 
 declare_lint! {
     pub NON_ASCII_IDENTS,
@@ -24,9 +26,6 @@ declare_lint! {
 
 declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
 
-use std::hash::{Hash, Hasher};
-use std::ops::Deref;
-
 enum CowBoxSymStr {
     Interned(SymbolStr),
     Owned(Box<str>),
@@ -73,6 +72,35 @@ fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr
     }
 }
 
+fn is_in_ascii_confusable_closure(c: char) -> bool {
+    // FIXME: move this table to `unicode_security` crate.
+    // data here corresponds to Unicode 13.
+    const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
+    let c = c as u64;
+    for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
+        if c >= range_start && c <= range_end {
+            return true;
+        }
+    }
+    false
+}
+
+fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
+    // FIXME: move this table to `unicode_security` crate.
+    // data here corresponds to Unicode 13.
+    const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
+        0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
+        0x2080,
+    ];
+    let c = c as u64;
+    for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
+        if c == item {
+            return true;
+        }
+    }
+    false
+}
+
 impl EarlyLintPass for NonAsciiIdents {
     fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
         use rustc_session::lint::Level;
@@ -81,9 +109,26 @@ impl EarlyLintPass for NonAsciiIdents {
         }
         let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
         let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
+        let mut in_fast_path = true;
         for (symbol, sp) in symbols.iter() {
+            // fast path
             let symbol_str = symbol.as_str();
-            symbol_strs_and_spans.push((symbol_str, *sp));
+            if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
+                // fallback to slow path.
+                symbol_strs_and_spans.clear();
+                in_fast_path = false;
+                break;
+            }
+            if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
+                symbol_strs_and_spans.push((symbol_str, *sp));
+            }
+        }
+        if !in_fast_path {
+            // slow path
+            for (symbol, sp) in symbols.iter() {
+                let symbol_str = symbol.as_str();
+                symbol_strs_and_spans.push((symbol_str, *sp));
+            }
         }
         drop(symbols);
         symbol_strs_and_spans.sort_by_key(|x| x.0.clone());