diff options
| author | Igor Aleksanov <popzxc@yandex.ru> | 2021-06-30 19:06:33 +0300 |
|---|---|---|
| committer | Igor Aleksanov <popzxc@yandex.ru> | 2021-06-30 19:06:33 +0300 |
| commit | 018be41deedd086191b8ce45895164e0aa7046b0 (patch) | |
| tree | e6d638f23778deb36c92e1c1efc357a80bc0decc | |
| parent | b286b38a295d6d68361f463cdeaf7536b051ddb2 (diff) | |
| download | rust-018be41deedd086191b8ce45895164e0aa7046b0.tar.gz rust-018be41deedd086191b8ce45895164e0aa7046b0.zip | |
Implement 'disallowed_script_idents' lint
| -rw-r--r-- | CHANGELOG.md | 1 | ||||
| -rw-r--r-- | clippy_lints/Cargo.toml | 1 | ||||
| -rw-r--r-- | clippy_lints/src/disallowed_script_idents.rs | 112 | ||||
| -rw-r--r-- | clippy_lints/src/lib.rs | 6 | ||||
| -rw-r--r-- | clippy_lints/src/utils/conf.rs | 2 | ||||
| -rw-r--r-- | tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr | 2 | ||||
| -rw-r--r-- | tests/ui/disallowed_script_idents.rs | 10 | ||||
| -rw-r--r-- | tests/ui/disallowed_script_idents.stderr | 20 |
8 files changed, 152 insertions, 2 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f62a172035..f3a80703238 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2487,6 +2487,7 @@ Released 2018-09-13 [`derive_hash_xor_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_hash_xor_eq [`derive_ord_xor_partial_ord`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_ord_xor_partial_ord [`disallowed_method`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_method +[`disallowed_script_idents`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_script_idents [`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type [`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression [`doc_markdown`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_markdown diff --git a/clippy_lints/Cargo.toml b/clippy_lints/Cargo.toml index d3d12062f07..42cf7547f51 100644 --- a/clippy_lints/Cargo.toml +++ b/clippy_lints/Cargo.toml @@ -23,6 +23,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0", optional = true } toml = "0.5.3" unicode-normalization = "0.1" +unicode-script = { version = "0.5.3", default-features = false } semver = "0.11" rustc-semver = "1.1.0" # NOTE: cargo requires serde feat in its url dep diff --git a/clippy_lints/src/disallowed_script_idents.rs b/clippy_lints/src/disallowed_script_idents.rs new file mode 100644 index 00000000000..12c525634c5 --- /dev/null +++ b/clippy_lints/src/disallowed_script_idents.rs @@ -0,0 +1,112 @@ +use clippy_utils::diagnostics::span_lint; +use rustc_ast::ast; +use rustc_data_structures::fx::FxHashSet; +use rustc_lint::{EarlyContext, EarlyLintPass, Level}; +use rustc_session::{declare_tool_lint, impl_lint_pass}; +use unicode_script::{Script, UnicodeScript}; + +declare_clippy_lint! { + /// **What it does:** Checks for usage of unicode scripts other than those explicitly allowed + /// by the lint config. + /// + /// This lint doesn't take into account non-text scripts such as `Unknown` and `Linear_A`. + /// It also ignores the `Common` script type. + /// While configuring, be sure to use official script name [aliases] from + /// [the list of supported scripts][supported_scripts]. + /// + /// See also: [`non_ascii_idents`]. + /// + /// [aliases]: http://www.unicode.org/reports/tr24/tr24-31.html#Script_Value_Aliases + /// [supported_scripts]: https://www.unicode.org/iso15924/iso15924-codes.html + /// + /// **Why is this bad?** It may be not desired to have many different scripts for + /// identifiers in the codebase. + /// + /// Note that if you only want to allow plain English, you might want to use + /// built-in [`non_ascii_idents`] lint instead. + /// + /// [`non_ascii_idents`]: https://doc.rust-lang.org/rustc/lints/listing/allowed-by-default.html#non-ascii-idents + /// + /// **Known problems:** None. + /// + /// **Example:** + /// ```rust + /// // Assuming that `clippy.toml` contains the following line: + /// // allowed-locales = ["Latin", "Cyrillic"] + /// let counter = 10; // OK, latin is allowed. + /// let счётчик = 10; // OK, cyrillic is allowed. + /// let zähler = 10; // OK, it's still latin. + /// let カウンタ = 10; // Will spawn the lint. + /// ``` + pub DISALLOWED_SCRIPT_IDENTS, + restriction, + "usage of non-allowed Unicode scripts" +} + +#[derive(Clone, Debug)] +pub struct DisallowedScriptIdents { + whitelist: FxHashSet<Script>, +} + +impl DisallowedScriptIdents { + pub fn new(whitelist: &[String]) -> Self { + let whitelist = whitelist + .iter() + .map(String::as_str) + .filter_map(Script::from_full_name) + .collect(); + Self { whitelist } + } +} + +impl_lint_pass!(DisallowedScriptIdents => [DISALLOWED_SCRIPT_IDENTS]); + +impl EarlyLintPass for DisallowedScriptIdents { + fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { + // Implementation is heavily inspired by the implementation of [`non_ascii_idents`] lint: + // https://github.com/rust-lang/rust/blob/master/compiler/rustc_lint/src/non_ascii_idents.rs + + let check_disallowed_script_idents = cx.builder.lint_level(DISALLOWED_SCRIPT_IDENTS).0 != Level::Allow; + if !check_disallowed_script_idents { + return; + } + + let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); + // Sort by `Span` so that error messages make sense with respect to the + // order of identifier locations in the code. + let mut symbols: Vec<_> = symbols.iter().collect(); + symbols.sort_unstable_by_key(|k| k.1); + + for (symbol, &span) in &symbols { + // Note: `symbol.as_str()` is an expensive operation, thus should not be called + // more than once for a single symbol. + let symbol_str = symbol.as_str(); + if symbol_str.is_ascii() { + continue; + } + + for c in symbol_str.chars() { + // We want to iterate through all the scripts associated with this character + // and check whether at least of one scripts is in the whitelist. + let forbidden_script = c + .script_extension() + .iter() + .find(|script| !self.whitelist.contains(script)); + if let Some(script) = forbidden_script { + span_lint( + cx, + DISALLOWED_SCRIPT_IDENTS, + span, + &format!( + "identifier `{}` has a Unicode script that is not allowed by configuration: {}", + symbol_str, + script.full_name() + ), + ); + // We don't want to spawn warning multiple times over a single identifier. + break; + } + } + } + } +} diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index 9cffeeb0224..0eb824dabf3 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -187,6 +187,7 @@ mod default_numeric_fallback; mod dereference; mod derive; mod disallowed_method; +mod disallowed_script_idents; mod disallowed_type; mod doc; mod double_comparison; @@ -585,6 +586,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: derive::EXPL_IMPL_CLONE_ON_COPY, derive::UNSAFE_DERIVE_DESERIALIZE, disallowed_method::DISALLOWED_METHOD, + disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS, disallowed_type::DISALLOWED_TYPE, doc::DOC_MARKDOWN, doc::MISSING_ERRORS_DOC, @@ -995,6 +997,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: LintId::of(create_dir::CREATE_DIR), LintId::of(dbg_macro::DBG_MACRO), LintId::of(default_numeric_fallback::DEFAULT_NUMERIC_FALLBACK), + LintId::of(disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS), LintId::of(else_if_without_else::ELSE_IF_WITHOUT_ELSE), LintId::of(exhaustive_items::EXHAUSTIVE_ENUMS), LintId::of(exhaustive_items::EXHAUSTIVE_STRUCTS), @@ -2082,7 +2085,8 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: store.register_late_pass(move || box disallowed_type::DisallowedType::new(&disallowed_types)); let import_renames = conf.enforced_import_renames.clone(); store.register_late_pass(move || box missing_enforced_import_rename::ImportRename::new(import_renames.clone())); - + let scripts = conf.allowed_scripts.clone(); + store.register_early_pass(move || box disallowed_script_idents::DisallowedScriptIdents::new(&scripts)); } #[rustfmt::skip] diff --git a/clippy_lints/src/utils/conf.rs b/clippy_lints/src/utils/conf.rs index 6fc4998318c..44d3d456342 100644 --- a/clippy_lints/src/utils/conf.rs +++ b/clippy_lints/src/utils/conf.rs @@ -212,6 +212,8 @@ define_Conf! { (standard_macro_braces: Vec<crate::nonstandard_macro_braces::MacroMatcher> = Vec::new()), /// Lint: MISSING_ENFORCED_IMPORT_RENAMES. The list of imports to always rename, a fully qualified path followed by the rename. (enforced_import_renames: Vec<crate::utils::conf::Rename> = Vec::new()), + /// Lint: RESTRICTED_SCRIPTS. The list of unicode scripts allowed to be used in the scope. + (allowed_scripts: Vec<String> = vec!["Latin".to_string()]), } /// Search for the configuration file. diff --git a/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr b/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr index aa7bfa2cc8c..e0029ebeb27 100644 --- a/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr +++ b/tests/ui-toml/toml_unknown_key/conf_unknown_key.stderr @@ -1,4 +1,4 @@ -error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `third-party` at line 5 column 1 +error: error reading Clippy's configuration file `$DIR/clippy.toml`: unknown field `foobar`, expected one of `avoid-breaking-exported-api`, `msrv`, `blacklisted-names`, `cognitive-complexity-threshold`, `cyclomatic-complexity-threshold`, `doc-valid-idents`, `too-many-arguments-threshold`, `type-complexity-threshold`, `single-char-binding-names-threshold`, `too-large-for-stack`, `enum-variant-name-threshold`, `enum-variant-size-threshold`, `verbose-bit-mask-threshold`, `literal-representation-threshold`, `trivial-copy-size-limit`, `pass-by-value-size-limit`, `too-many-lines-threshold`, `array-size-threshold`, `vec-box-size-threshold`, `max-trait-bounds`, `max-struct-bools`, `max-fn-params-bools`, `warn-on-all-wildcard-imports`, `disallowed-methods`, `disallowed-types`, `unreadable-literal-lint-fractions`, `upper-case-acronyms-aggressive`, `cargo-ignore-publish`, `standard-macro-braces`, `enforced-import-renames`, `allowed-scripts`, `third-party` at line 5 column 1 error: aborting due to previous error diff --git a/tests/ui/disallowed_script_idents.rs b/tests/ui/disallowed_script_idents.rs new file mode 100644 index 00000000000..cfdda35971f --- /dev/null +++ b/tests/ui/disallowed_script_idents.rs @@ -0,0 +1,10 @@ +#![deny(clippy::disallowed_script_idents)] +#![allow(dead_code)] + +fn main() { + let counter = 10; // OK, latin is allowed. + let zähler = 10; // OK, it's still latin. + + let счётчик = 10; // Cyrillic is not allowed by default. + let カウンタ = 10; // Same for japanese. +} diff --git a/tests/ui/disallowed_script_idents.stderr b/tests/ui/disallowed_script_idents.stderr new file mode 100644 index 00000000000..cc84dc1d43c --- /dev/null +++ b/tests/ui/disallowed_script_idents.stderr @@ -0,0 +1,20 @@ +error: identifier `счётчик` has a Unicode script that is not allowed by configuration: Cyrillic + --> $DIR/disallowed_script_idents.rs:8:9 + | +LL | let счётчик = 10; // Cyrillic is not allowed by default. + | ^^^^^^^ + | +note: the lint level is defined here + --> $DIR/disallowed_script_idents.rs:1:9 + | +LL | #![deny(clippy::disallowed_script_idents)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: identifier `カウンタ` has a Unicode script that is not allowed by configuration: Katakana + --> $DIR/disallowed_script_idents.rs:9:9 + | +LL | let カウンタ = 10; // Same for japanese. + | ^^^^^^^^ + +error: aborting due to 2 previous errors + |
