about summary refs log tree commit diff
diff options
context:
space:
mode:
authorIgor Aleksanov <popzxc@yandex.ru>2021-06-30 19:06:33 +0300
committerIgor Aleksanov <popzxc@yandex.ru>2021-06-30 19:06:33 +0300
commit018be41deedd086191b8ce45895164e0aa7046b0 (patch)
treee6d638f23778deb36c92e1c1efc357a80bc0decc
parentb286b38a295d6d68361f463cdeaf7536b051ddb2 (diff)
downloadrust-018be41deedd086191b8ce45895164e0aa7046b0.tar.gz
rust-018be41deedd086191b8ce45895164e0aa7046b0.zip
Implement 'disallowed_script_idents' lint
-rw-r--r--CHANGELOG.md1
-rw-r--r--clippy_lints/Cargo.toml1
-rw-r--r--clippy_lints/src/disallowed_script_idents.rs112
-rw-r--r--clippy_lints/src/lib.rs6
-rw-r--r--clippy_lints/src/utils/conf.rs2
-rw-r--r--tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr2
-rw-r--r--tests/ui/disallowed_script_idents.rs10
-rw-r--r--tests/ui/disallowed_script_idents.stderr20
8 files changed, 152 insertions, 2 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f62a172035..f3a80703238 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2487,6 +2487,7 @@ Released 2018-09-13
 [`derive_hash_xor_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_hash_xor_eq
 [`derive_ord_xor_partial_ord`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_ord_xor_partial_ord
 [`disallowed_method`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_method
+[`disallowed_script_idents`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_script_idents
 [`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type
 [`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression
 [`doc_markdown`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_markdown
diff --git a/clippy_lints/Cargo.toml b/clippy_lints/Cargo.toml
index d3d12062f07..42cf7547f51 100644
--- a/clippy_lints/Cargo.toml
+++ b/clippy_lints/Cargo.toml
@@ -23,6 +23,7 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = { version = "1.0", optional = true }
 toml = "0.5.3"
 unicode-normalization = "0.1"
+unicode-script = { version = "0.5.3", default-features = false }
 semver = "0.11"
 rustc-semver = "1.1.0"
 # NOTE: cargo requires serde feat in its url dep
diff --git a/clippy_lints/src/disallowed_script_idents.rs b/clippy_lints/src/disallowed_script_idents.rs
new file mode 100644
index 00000000000..12c525634c5
--- /dev/null
+++ b/clippy_lints/src/disallowed_script_idents.rs
@@ -0,0 +1,112 @@
+use clippy_utils::diagnostics::span_lint;
+use rustc_ast::ast;
+use rustc_data_structures::fx::FxHashSet;
+use rustc_lint::{EarlyContext, EarlyLintPass, Level};
+use rustc_session::{declare_tool_lint, impl_lint_pass};
+use unicode_script::{Script, UnicodeScript};
+
+declare_clippy_lint! {
+    /// **What it does:** Checks for usage of unicode scripts other than those explicitly allowed
+    /// by the lint config.
+    ///
+    /// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`.
+    /// It also ignores the `Common` script type.
+    /// While configuring, be sure to use official script name [aliases] from
+    /// [the list of supported scripts][supported_scripts].
+    ///
+    /// See also: [`non_ascii_idents`].
+    ///
+    /// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases
+    /// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html
+    ///
+    /// **Why is this bad?** It may be not desired to have many different scripts for
+    /// identifiers in the codebase.
+    ///
+    /// Note that if you only want to allow plain English, you might want to use
+    /// built-in [`non_ascii_idents`] lint instead.
+    ///
+    /// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents
+    ///
+    /// **Known problems:** None.
+    ///
+    /// **Example:**
+    /// ```rust
+    /// // Assuming that `clippy.toml` contains the following line:
+    /// // allowed-locales = ["Latin", "Cyrillic"]
+    /// let counter = 10; // OK, latin is allowed.
+    /// let счётчик = 10; // OK, cyrillic is allowed.
+    /// let zähler = 10; // OK, it's still latin.
+    /// let カウンタ = 10; // Will spawn the lint.
+    /// ```
+    pub DISALLOWED_SCRIPT_IDENTS,
+    restriction,
+    "usage of non-allowed Unicode scripts"
+}
+
+#[derive(Clone, Debug)]
+pub struct DisallowedScriptIdents {
+    whitelist: FxHashSet<Script>,
+}
+
+impl DisallowedScriptIdents {
+    pub fn new(whitelist: &[String]) -> Self {
+        let whitelist = whitelist
+            .iter()
+            .map(String::as_str)
+            .filter_map(Script::from_full_name)
+            .collect();
+        Self { whitelist }
+    }
+}
+
+impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]);
+
+impl EarlyLintPass for DisallowedScriptIdents {
+    fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
+        // Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint:
+        // https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs
+
+        let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow;
+        if !check_disallowed_script_idents {
+            return;
+        }
+
+        let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
+        // Sort by `Span` so that error messages make sense with respect to the
+        // order of identifier locations in the code.
+        let mut symbols: Vec<_> = symbols.iter().collect();
+        symbols.sort_unstable_by_key(|k| k.1);
+
+        for (symbol, &span) in &symbols {
+            // Note: `symbol.as_str()` is an expensive operation, thus should not be called
+            // more than once for a single symbol.
+            let symbol_str = symbol.as_str();
+            if symbol_str.is_ascii() {
+                continue;
+            }
+
+            for c in symbol_str.chars() {
+                // We want to iterate through all the scripts associated with this character
+                // and check whether at least of one scripts is in the whitelist.
+                let forbidden_script = c
+                    .script_extension()
+                    .iter()
+                    .find(|script| !self.whitelist.contains(script));
+                if let Some(script) = forbidden_script {
+                    span_lint(
+                        cx,
+                        DISALLOWED_SCRIPT_IDENTS,
+                        span,
+                        &format!(
+                            "identifier `{}` has a Unicode script that is not allowed by configuration: {}",
+                            symbol_str,
+                            script.full_name()
+                        ),
+                    );
+                    // We don't want to spawn warning multiple times over a single identifier.
+                    break;
+                }
+            }
+        }
+    }
+}
diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs
index 9cffeeb0224..0eb824dabf3 100644
--- a/clippy_lints/src/lib.rs
+++ b/clippy_lints/src/lib.rs
@@ -187,6 +187,7 @@ mod default_numeric_fallback;
 mod dereference;
 mod derive;
 mod disallowed_method;
+mod disallowed_script_idents;
 mod disallowed_type;
 mod doc;
 mod double_comparison;
@@ -585,6 +586,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
         derive::EXPL_IMPL_CLONE_ON_COPY,
         derive::UNSAFE_DERIVE_DESERIALIZE,
         disallowed_method::DISALLOWED_METHOD,
+        disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS,
         disallowed_type::DISALLOWED_TYPE,
         doc::DOC_MARKDOWN,
         doc::MISSING_ERRORS_DOC,
@@ -995,6 +997,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
         LintId::of(create_dir::CREATE_DIR),
         LintId::of(dbg_macro::DBG_MACRO),
         LintId::of(default_numeric_fallback::DEFAULT_NUMERIC_FALLBACK),
+        LintId::of(disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS),
         LintId::of(else_if_without_else::ELSE_IF_WITHOUT_ELSE),
         LintId::of(exhaustive_items::EXHAUSTIVE_ENUMS),
         LintId::of(exhaustive_items::EXHAUSTIVE_STRUCTS),
@@ -2082,7 +2085,8 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
     store.register_late_pass(move || box disallowed_type::DisallowedType::new(&disallowed_types));
     let import_renames = conf.enforced_import_renames.clone();
     store.register_late_pass(move || box missing_enforced_import_rename::ImportRename::new(import_renames.clone()));
-
+    let scripts = conf.allowed_scripts.clone();
+    store.register_early_pass(move || box disallowed_script_idents::DisallowedScriptIdents::new(&scripts));
 }
 
 #[rustfmt::skip]
diff --git a/clippy_lints/src/utils/conf.rs b/clippy_lints/src/utils/conf.rs
index 6fc4998318c..44d3d456342 100644
--- a/clippy_lints/src/utils/conf.rs
+++ b/clippy_lints/src/utils/conf.rs
@@ -212,6 +212,8 @@ define_Conf! {
     (standard_macro_braces: Vec<crate::nonstandard_macro_braces::MacroMatcher> = Vec::new()),
     /// Lint: MISSING_ENFORCED_IMPORT_RENAMES. The list of imports to always rename, a fully qualified path followed by the rename.
     (enforced_import_renames: Vec<crate::utils::conf::Rename> = Vec::new()),
+    /// Lint: RESTRICTED_SCRIPTS. The list of unicode scripts allowed to be used in the scope.
+    (allowed_scripts: Vec<String> = vec!["Latin".to_string()]),
 }
 
 /// Search for the configuration file.
diff --git a/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr b/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr
index aa7bfa2cc8c..e0029ebeb27 100644
--- a/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr
+++ b/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr
@@ -1,4 +1,4 @@
-error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `third-party` at line 5 column 1
+error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `allowed-scripts`, `third-party` at line 5 column 1
 
 error: aborting due to previous error
 
diff --git a/tests/ui/disallowed_script_idents.rs b/tests/ui/disallowed_script_idents.rs
new file mode 100644
index 00000000000..cfdda35971f
--- /dev/null
+++ b/tests/ui/disallowed_script_idents.rs
@@ -0,0 +1,10 @@
+#![deny(clippy::disallowed_script_idents)]
+#![allow(dead_code)]
+
+fn main() {
+    let counter = 10; // OK, latin is allowed.
+    let zähler = 10; // OK, it's still latin.
+
+    let счётчик = 10; // Cyrillic is not allowed by default.
+    let カウンタ = 10; // Same for japanese.
+}
diff --git a/tests/ui/disallowed_script_idents.stderr b/tests/ui/disallowed_script_idents.stderr
new file mode 100644
index 00000000000..cc84dc1d43c
--- /dev/null
+++ b/tests/ui/disallowed_script_idents.stderr
@@ -0,0 +1,20 @@
+error: identifier `счётчик` has a Unicode script that is not allowed by configuration: Cyrillic
+  --> $DIR/disallowed_script_idents.rs:8:9
+   |
+LL |     let счётчик = 10; // Cyrillic is not allowed by default.
+   |         ^^^^^^^
+   |
+note: the lint level is defined here
+  --> $DIR/disallowed_script_idents.rs:1:9
+   |
+LL | #![deny(clippy::disallowed_script_idents)]
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error: identifier `カウンタ` has a Unicode script that is not allowed by configuration: Katakana
+  --> $DIR/disallowed_script_idents.rs:9:9
+   |
+LL |     let カウンタ = 10; // Same for japanese.
+   |         ^^^^^^^^
+
+error: aborting due to 2 previous errors
+