about summary refs log tree commit diff
diff options
context:
space:
mode:
authorManish Goregaokar <manishsmail@gmail.com>2020-06-25 18:00:05 -0700
committerGitHub <noreply@github.com>2020-06-25 18:00:05 -0700
commit23c9ac6b730f4e71b47f8714420f2609537b7114 (patch)
treec4c81002c930a5b40afff506f046cb5251a83e94
parent9275ff7b218d6aacea649425bf1b603c6f78d578 (diff)
parent25e864e19812e079a9e69134fd1bbbfee444c0a2 (diff)
downloadrust-23c9ac6b730f4e71b47f8714420f2609537b7114.tar.gz
rust-23c9ac6b730f4e71b47f8714420f2609537b7114.zip
Rollup merge of #72770 - crlf0710:mixed_script_confusable, r=Manishearth
Implement mixed script confusable lint.

This implements the mixed script confusable lint defined in RFC 2457.
This is blocked on #72069 and https://github.com/unicode-rs/unicode-security/pull/13, and will need a Cargo.toml version bump after those are resolved.

The lint message warning is sub-optimal for now. We'll need a mechanism to properly output  `AugmentScriptSet` to screen, this is to be added in `unicode-security` crate.

r? @Manishearth
-rw-r--r--Cargo.lock8
-rw-r--r--src/librustc_lint/Cargo.toml2
-rw-r--r--src/librustc_lint/non_ascii_idents.rs354
-rw-r--r--src/librustc_session/parse.rs3
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs10
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr8
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables-2.rs20
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.rs15
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.stderr34
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs4
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr8
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs4
-rw-r--r--src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr8
-rw-r--r--src/test/ui/parser/issue-62524.rs2
-rw-r--r--src/test/ui/parser/issue-62524.stderr6
-rw-r--r--src/test/ui/utf8_idents.rs2
-rw-r--r--src/test/ui/utf8_idents.stderr10
17 files changed, 333 insertions, 165 deletions
diff --git a/Cargo.lock b/Cargo.lock
index b54566e7176..8a0991059d5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5405,15 +5405,15 @@ dependencies = [
 
 [[package]]
 name = "unicode-script"
-version = "0.4.0"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b2c5c29e805da6817f5af6a627d65adb045cebf05cccd5a3493d6109454391c"
+checksum = "58b33414ea8db4b7ea0343548dbdc31d27aef06beacf7044a87e564d9b0feb7d"
 
 [[package]]
 name = "unicode-security"
-version = "0.0.3"
+version = "0.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5f9011bbed9c13372bc8df618b55a38138445199caf3b61d432c6859c36dee0"
+checksum = "5d87c28edc5b263377e448d6cdcb935c06b95413d8013ba6fae470558ccab18f"
 dependencies = [
  "unicode-normalization",
  "unicode-script",
diff --git a/src/librustc_lint/Cargo.toml b/src/librustc_lint/Cargo.toml
index ada6f2a9381..58c15257326 100644
--- a/src/librustc_lint/Cargo.toml
+++ b/src/librustc_lint/Cargo.toml
@@ -10,7 +10,7 @@ path = "lib.rs"
 
 [dependencies]
 log = "0.4"
-unicode-security = "0.0.3"
+unicode-security = "0.0.5"
 rustc_middle = { path = "../librustc_middle" }
 rustc_ast_pretty = { path = "../librustc_ast_pretty" }
 rustc_attr = { path = "../librustc_attr" }
diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs
index 064b0255397..30dbd069c29 100644
--- a/src/librustc_lint/non_ascii_idents.rs
+++ b/src/librustc_lint/non_ascii_idents.rs
@@ -1,9 +1,7 @@
 use crate::{EarlyContext, EarlyLintPass, LintContext};
 use rustc_ast::ast;
 use rustc_data_structures::fx::FxHashMap;
-use rustc_span::symbol::{Ident, SymbolStr};
-use std::hash::{Hash, Hasher};
-use std::ops::Deref;
+use rustc_span::symbol::SymbolStr;
 
 declare_lint! {
     pub NON_ASCII_IDENTS,
@@ -19,158 +17,256 @@ declare_lint! {
     crate_level_only
 }
 
-// FIXME: Change this to warn.
 declare_lint! {
     pub CONFUSABLE_IDENTS,
-    Allow,
+    Warn,
     "detects visually confusable pairs between identifiers",
     crate_level_only
 }
 
-declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
-
-enum CowBoxSymStr {
-    Interned(SymbolStr),
-    Owned(Box<str>),
-}
-
-impl Deref for CowBoxSymStr {
-    type Target = str;
-
-    fn deref(&self) -> &str {
-        match self {
-            CowBoxSymStr::Interned(interned) => interned,
-            CowBoxSymStr::Owned(ref owned) => owned,
-        }
-    }
-}
-
-impl Hash for CowBoxSymStr {
-    #[inline]
-    fn hash<H: Hasher>(&self, state: &mut H) {
-        Hash::hash(&**self, state)
-    }
-}
-
-impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
-    #[inline]
-    fn eq(&self, other: &CowBoxSymStr) -> bool {
-        PartialEq::eq(&**self, &**other)
-    }
-}
-
-impl Eq for CowBoxSymStr {}
-
-fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
-    use std::mem::swap;
-    use unicode_security::confusable_detection::skeleton;
-    buffer.clear();
-    buffer.extend(skeleton(&symbol_str));
-    if symbol_str == *buffer {
-        CowBoxSymStr::Interned(symbol_str)
-    } else {
-        let mut owned = String::new();
-        swap(buffer, &mut owned);
-        CowBoxSymStr::Owned(owned.into_boxed_str())
-    }
-}
-
-fn is_in_ascii_confusable_closure(c: char) -> bool {
-    // FIXME: move this table to `unicode_security` crate.
-    // data here corresponds to Unicode 13.
-    const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
-    let c = c as u64;
-    for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
-        if c >= range_start && c <= range_end {
-            return true;
-        }
-    }
-    false
+declare_lint! {
+    pub MIXED_SCRIPT_CONFUSABLES,
+    Warn,
+    "detects Unicode scripts whose mixed script confusables codepoints are solely used",
+    crate_level_only
 }
 
-fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
-    // FIXME: move this table to `unicode_security` crate.
-    // data here corresponds to Unicode 13.
-    const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
-        0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
-        0x2080,
-    ];
-    let c = c as u64;
-    for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
-        if c == item {
-            return true;
-        }
-    }
-    false
-}
+declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS, MIXED_SCRIPT_CONFUSABLES]);
 
 impl EarlyLintPass for NonAsciiIdents {
     fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
         use rustc_session::lint::Level;
-        if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
+        use rustc_span::Span;
+        use std::collections::BTreeMap;
+        use unicode_security::GeneralSecurityProfile;
+        use utils::CowBoxSymStr;
+
+        let check_non_ascii_idents = cx.builder.lint_level(NON_ASCII_IDENTS).0 != Level::Allow;
+        let check_uncommon_codepoints =
+            cx.builder.lint_level(UNCOMMON_CODEPOINTS).0 != Level::Allow;
+        let check_confusable_idents = cx.builder.lint_level(CONFUSABLE_IDENTS).0 != Level::Allow;
+        let check_mixed_script_confusables =
+            cx.builder.lint_level(MIXED_SCRIPT_CONFUSABLES).0 != Level::Allow;
+
+        if !check_non_ascii_idents
+            && !check_uncommon_codepoints
+            && !check_confusable_idents
+            && !check_mixed_script_confusables
+        {
             return;
         }
+
+        let mut has_non_ascii_idents = false;
         let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
-        let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
-        let mut in_fast_path = true;
-        for (symbol, sp) in symbols.iter() {
-            // fast path
+        for (symbol, &sp) in symbols.iter() {
             let symbol_str = symbol.as_str();
-            if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
-                // fallback to slow path.
-                symbol_strs_and_spans.clear();
-                in_fast_path = false;
-                break;
+            if symbol_str.is_ascii() {
+                continue;
             }
-            if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
-                symbol_strs_and_spans.push((symbol_str, *sp));
+            has_non_ascii_idents = true;
+            cx.struct_span_lint(NON_ASCII_IDENTS, sp, |lint| {
+                lint.build("identifier contains non-ASCII characters").emit()
+            });
+            if check_uncommon_codepoints
+                && !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
+            {
+                cx.struct_span_lint(UNCOMMON_CODEPOINTS, sp, |lint| {
+                    lint.build("identifier contains uncommon Unicode codepoints").emit()
+                })
             }
         }
-        if !in_fast_path {
-            // slow path
-            for (symbol, sp) in symbols.iter() {
+
+        if has_non_ascii_idents && check_confusable_idents {
+            let mut skeleton_map: FxHashMap<CowBoxSymStr, (SymbolStr, Span, bool)> =
+                FxHashMap::with_capacity_and_hasher(symbols.len(), Default::default());
+            let mut str_buf = String::new();
+            for (symbol, &sp) in symbols.iter() {
+                fn calc_skeleton(symbol_str: &SymbolStr, buffer: &mut String) -> CowBoxSymStr {
+                    use std::mem::replace;
+                    use unicode_security::confusable_detection::skeleton;
+                    buffer.clear();
+                    buffer.extend(skeleton(symbol_str));
+                    if *symbol_str == *buffer {
+                        CowBoxSymStr::Interned(symbol_str.clone())
+                    } else {
+                        let owned = replace(buffer, String::new());
+                        CowBoxSymStr::Owned(owned.into_boxed_str())
+                    }
+                }
                 let symbol_str = symbol.as_str();
-                symbol_strs_and_spans.push((symbol_str, *sp));
+                let is_ascii = symbol_str.is_ascii();
+                let skeleton = calc_skeleton(&symbol_str, &mut str_buf);
+                skeleton_map
+                    .entry(skeleton)
+                    .and_modify(|(existing_symbolstr, existing_span, existing_is_ascii)| {
+                        if !*existing_is_ascii || !is_ascii {
+                            cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
+                                lint.build(&format!(
+                                    "identifier pair considered confusable between `{}` and `{}`",
+                                    existing_symbolstr, symbol_str
+                                ))
+                                .span_label(
+                                    *existing_span,
+                                    "this is where the previous identifier occurred",
+                                )
+                                .emit();
+                            });
+                        }
+                        if *existing_is_ascii && !is_ascii {
+                            *existing_symbolstr = symbol_str.clone();
+                            *existing_span = sp;
+                            *existing_is_ascii = is_ascii;
+                        }
+                    })
+                    .or_insert((symbol_str, sp, is_ascii));
             }
         }
-        drop(symbols);
-        symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
-        let mut skeleton_map =
-            FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
-        let mut str_buf = String::new();
-        for (symbol_str, sp) in symbol_strs_and_spans {
-            let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
-            skeleton_map
-                .entry(skeleton)
-                .and_modify(|(existing_symbolstr, existing_span)| {
-                    cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
-                        lint.build(&format!(
-                            "identifier pair considered confusable between `{}` and `{}`",
-                            existing_symbolstr, symbol_str
-                        ))
-                        .span_label(
-                            *existing_span,
-                            "this is where the previous identifier occurred",
-                        )
-                        .emit();
+
+        if has_non_ascii_idents && check_mixed_script_confusables {
+            use unicode_security::is_potential_mixed_script_confusable_char;
+            use unicode_security::mixed_script::AugmentedScriptSet;
+
+            #[derive(Clone)]
+            enum ScriptSetUsage {
+                Suspicious(Vec<char>, Span),
+                Verified,
+            }
+
+            let mut script_states: FxHashMap<AugmentedScriptSet, ScriptSetUsage> =
+                FxHashMap::default();
+            let latin_augmented_script_set = AugmentedScriptSet::for_char('A');
+            script_states.insert(latin_augmented_script_set, ScriptSetUsage::Verified);
+
+            let mut has_suspicous = false;
+            for (symbol, &sp) in symbols.iter() {
+                let symbol_str = symbol.as_str();
+                for ch in symbol_str.chars() {
+                    if ch.is_ascii() {
+                        // all ascii characters are covered by exception.
+                        continue;
+                    }
+                    if !GeneralSecurityProfile::identifier_allowed(ch) {
+                        // this character is covered by `uncommon_codepoints` lint.
+                        continue;
+                    }
+                    let augmented_script_set = AugmentedScriptSet::for_char(ch);
+                    script_states
+                        .entry(augmented_script_set)
+                        .and_modify(|existing_state| {
+                            if let ScriptSetUsage::Suspicious(ch_list, _) = existing_state {
+                                if is_potential_mixed_script_confusable_char(ch) {
+                                    ch_list.push(ch);
+                                } else {
+                                    *existing_state = ScriptSetUsage::Verified;
+                                }
+                            }
+                        })
+                        .or_insert_with(|| {
+                            if !is_potential_mixed_script_confusable_char(ch) {
+                                ScriptSetUsage::Verified
+                            } else {
+                                has_suspicous = true;
+                                ScriptSetUsage::Suspicious(vec![ch], sp)
+                            }
+                        });
+                }
+            }
+
+            if has_suspicous {
+                let verified_augmented_script_sets = script_states
+                    .iter()
+                    .flat_map(|(k, v)| match v {
+                        ScriptSetUsage::Verified => Some(*k),
+                        _ => None,
+                    })
+                    .collect::<Vec<_>>();
+
+                // we're sorting the output here.
+                let mut lint_reports: BTreeMap<(Span, Vec<char>), AugmentedScriptSet> =
+                    BTreeMap::new();
+
+                'outerloop: for (augment_script_set, usage) in script_states {
+                    let (mut ch_list, sp) = match usage {
+                        ScriptSetUsage::Verified => continue,
+                        ScriptSetUsage::Suspicious(ch_list, sp) => (ch_list, sp),
+                    };
+
+                    if augment_script_set.is_all() {
+                        continue;
+                    }
+
+                    for existing in verified_augmented_script_sets.iter() {
+                        if existing.is_all() {
+                            continue;
+                        }
+                        let mut intersect = *existing;
+                        intersect.intersect_with(augment_script_set);
+                        if !intersect.is_empty() && !intersect.is_all() {
+                            continue 'outerloop;
+                        }
+                    }
+
+                    ch_list.sort();
+                    ch_list.dedup();
+                    lint_reports.insert((sp, ch_list), augment_script_set);
+                }
+
+                for ((sp, ch_list), script_set) in lint_reports {
+                    cx.struct_span_lint(MIXED_SCRIPT_CONFUSABLES, sp, |lint| {
+                        let message = format!(
+                            "The usage of Script Group `{}` in this crate consists solely of mixed script confusables",
+                            script_set);
+                        let mut note = "The usage includes ".to_string();
+                        for (idx, ch) in ch_list.into_iter().enumerate() {
+                            if idx != 0 {
+                                note += ", ";
+                            }
+                            let char_info = format!("'{}' (U+{:04X})", ch, ch as u32);
+                            note += &char_info;
+                        }
+                        note += ".";
+                        lint.build(&message).note(&note).note("Please recheck to make sure their usages are indeed what you want.").emit()
                     });
-                })
-                .or_insert((symbol_str, sp));
+                }
+            }
         }
     }
-    fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) {
-        use unicode_security::GeneralSecurityProfile;
-        let name_str = ident.name.as_str();
-        if name_str.is_ascii() {
-            return;
+}
+
+mod utils {
+    use rustc_span::symbol::SymbolStr;
+    use std::hash::{Hash, Hasher};
+    use std::ops::Deref;
+
+    pub(super) enum CowBoxSymStr {
+        Interned(SymbolStr),
+        Owned(Box<str>),
+    }
+
+    impl Deref for CowBoxSymStr {
+        type Target = str;
+
+        fn deref(&self) -> &str {
+            match self {
+                CowBoxSymStr::Interned(interned) => interned,
+                CowBoxSymStr::Owned(ref owned) => owned,
+            }
+        }
+    }
+
+    impl Hash for CowBoxSymStr {
+        #[inline]
+        fn hash<H: Hasher>(&self, state: &mut H) {
+            Hash::hash(&**self, state)
         }
-        cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| {
-            lint.build("identifier contains non-ASCII characters").emit()
-        });
-        if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) {
-            cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| {
-                lint.build("identifier contains uncommon Unicode codepoints").emit()
-            })
+    }
+
+    impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
+        #[inline]
+        fn eq(&self, other: &CowBoxSymStr) -> bool {
+            PartialEq::eq(&**self, &**other)
         }
     }
+
+    impl Eq for CowBoxSymStr {}
 }
diff --git a/src/librustc_session/parse.rs b/src/librustc_session/parse.rs
index ddbc95fb1b0..f4e5da4d54f 100644
--- a/src/librustc_session/parse.rs
+++ b/src/librustc_session/parse.rs
@@ -13,6 +13,7 @@ use rustc_span::hygiene::ExpnId;
 use rustc_span::source_map::{FilePathMapping, SourceMap};
 use rustc_span::{MultiSpan, Span, Symbol};
 
+use std::collections::BTreeMap;
 use std::path::PathBuf;
 use std::str;
 
@@ -63,7 +64,7 @@ impl GatedSpans {
 #[derive(Default)]
 pub struct SymbolGallery {
     /// All symbols occurred and their first occurrance span.
-    pub symbols: Lock<FxHashMap<Symbol, Span>>,
+    pub symbols: Lock<BTreeMap<Symbol, Span>>,
 }
 
 impl SymbolGallery {
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs
index 12093837d26..e15ed2e70b8 100644
--- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.rs
@@ -2,8 +2,14 @@
 #![deny(confusable_idents)]
 #![allow(uncommon_codepoints, non_upper_case_globals)]
 
-const s: usize = 42; //~ ERROR identifier pair considered confusable
+const s: usize = 42;
 
 fn main() {
-    let s = "rust";
+    let s = "rust"; //~ ERROR identifier pair considered confusable
+    not_affected();
+}
+
+fn not_affected() {
+    let s1 = 1;
+    let sl = 'l';
 }
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr
index 40ee18acb3c..218f94f7b58 100644
--- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-confusable-idents.stderr
@@ -1,11 +1,11 @@
-error: identifier pair considered confusable between `s` and `s`
-  --> $DIR/lint-confusable-idents.rs:5:7
+error: identifier pair considered confusable between `s` and `s`
+  --> $DIR/lint-confusable-idents.rs:8:9
    |
 LL | const s: usize = 42;
-   |       ^^
+   |       -- this is where the previous identifier occurred
 ...
 LL |     let s = "rust";
-   |         - this is where the previous identifier occurred
+   |         ^
    |
 note: the lint level is defined here
   --> $DIR/lint-confusable-idents.rs:2:9
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables-2.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables-2.rs
new file mode 100644
index 00000000000..a5b45466da5
--- /dev/null
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables-2.rs
@@ -0,0 +1,20 @@
+// check-pass
+#![feature(non_ascii_idents)]
+#![deny(mixed_script_confusables)]
+
+struct ΑctuallyNotLatin;
+
+fn main() {
+    let λ = 42; // this usage of Greek confirms that Greek is used intentionally.
+}
+
+mod роре {
+    const エ: &'static str = "アイウ";
+
+    // this usage of Katakana confirms that Katakana is used intentionally.
+    fn ニャン() {
+        let д: usize = 100; // this usage of Cyrillic confirms that Cyrillic is used intentionally.
+
+        println!("meow!");
+    }
+}
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.rs
new file mode 100644
index 00000000000..4637b03f250
--- /dev/null
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.rs
@@ -0,0 +1,15 @@
+#![feature(non_ascii_idents)]
+#![deny(mixed_script_confusables)]
+
+struct ΑctuallyNotLatin;
+//~^ ERROR The usage of Script Group `Greek` in this crate consists solely of
+
+fn main() {
+    let v = ΑctuallyNotLatin;
+}
+
+mod роре {
+//~^ ERROR The usage of Script Group `Cyrillic` in this crate consists solely of
+    const エ: &'static str = "アイウ";
+    //~^ ERROR The usage of Script Group `Japanese, Katakana` in this crate consists solely of
+}
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.stderr
new file mode 100644
index 00000000000..6f75a1ece37
--- /dev/null
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-mixed-script-confusables.stderr
@@ -0,0 +1,34 @@
+error: The usage of Script Group `Greek` in this crate consists solely of mixed script confusables
+  --> $DIR/lint-mixed-script-confusables.rs:4:8
+   |
+LL | struct ΑctuallyNotLatin;
+   |        ^^^^^^^^^^^^^^^^
+   |
+note: the lint level is defined here
+  --> $DIR/lint-mixed-script-confusables.rs:2:9
+   |
+LL | #![deny(mixed_script_confusables)]
+   |         ^^^^^^^^^^^^^^^^^^^^^^^^
+   = note: The usage includes 'Α' (U+0391).
+   = note: Please recheck to make sure their usages are indeed what you want.
+
+error: The usage of Script Group `Cyrillic` in this crate consists solely of mixed script confusables
+  --> $DIR/lint-mixed-script-confusables.rs:11:5
+   |
+LL | mod роре {
+   |     ^^^^
+   |
+   = note: The usage includes 'е' (U+0435), 'о' (U+043E), 'р' (U+0440).
+   = note: Please recheck to make sure their usages are indeed what you want.
+
+error: The usage of Script Group `Japanese, Katakana` in this crate consists solely of mixed script confusables
+  --> $DIR/lint-mixed-script-confusables.rs:13:11
+   |
+LL |     const エ: &'static str = "アイウ";
+   |           ^^
+   |
+   = note: The usage includes 'エ' (U+30A8).
+   = note: Please recheck to make sure their usages are indeed what you want.
+
+error: aborting due to 3 previous errors
+
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs
index 057329a0a65..20d00cf701a 100644
--- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.rs
@@ -7,5 +7,7 @@ fn coöperation() {} //~ ERROR identifier contains non-ASCII characters
 
 fn main() {
     let naïveté = 2; //~ ERROR identifier contains non-ASCII characters
-    println!("{}", naïveté); //~ ERROR identifier contains non-ASCII characters
+
+    // using the same identifier the second time won't trigger the lint.
+    println!("{}", naïveté);
 }
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr
index 6c9f0866c01..048b6ff5d68 100644
--- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-non-ascii-idents.stderr
@@ -22,11 +22,5 @@ error: identifier contains non-ASCII characters
 LL |     let naïveté = 2;
    |         ^^^^^^^
 
-error: identifier contains non-ASCII characters
-  --> $DIR/lint-non-ascii-idents.rs:10:20
-   |
-LL |     println!("{}", naïveté);
-   |                    ^^^^^^^
-
-error: aborting due to 4 previous errors
+error: aborting due to 3 previous errors
 
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs
index 7ac0d035d5b..b5e251e047b 100644
--- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.rs
@@ -7,5 +7,7 @@ fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints
 
 fn main() {
     let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
-    println!("{}", ㇻㇲㇳ); //~ ERROR identifier contains uncommon Unicode codepoints
+
+    // using the same identifier the second time won't trigger the lint.
+    println!("{}", ㇻㇲㇳ);
 }
diff --git a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr
index b270bd1f051..05ea3d5de7d 100644
--- a/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr
+++ b/src/test/ui/lint/rfc-2457-non-ascii-idents/lint-uncommon-codepoints.stderr
@@ -22,11 +22,5 @@ error: identifier contains uncommon Unicode codepoints
 LL |     let ㇻㇲㇳ = "rust";
    |         ^^^^^^
 
-error: identifier contains uncommon Unicode codepoints
-  --> $DIR/lint-uncommon-codepoints.rs:10:20
-   |
-LL |     println!("{}", ㇻㇲㇳ);
-   |                    ^^^^^^
-
-error: aborting due to 4 previous errors
+error: aborting due to 3 previous errors
 
diff --git a/src/test/ui/parser/issue-62524.rs b/src/test/ui/parser/issue-62524.rs
index 57de4b87b0f..5259dfe2e65 100644
--- a/src/test/ui/parser/issue-62524.rs
+++ b/src/test/ui/parser/issue-62524.rs
@@ -1,4 +1,6 @@
 // ignore-tidy-trailing-newlines
 // error-pattern: aborting due to 3 previous errors
+#![allow(uncommon_codepoints)]
+
 y![
 Ϥ,
\ No newline at end of file
diff --git a/src/test/ui/parser/issue-62524.stderr b/src/test/ui/parser/issue-62524.stderr
index 8191c9682ce..d5e07622b11 100644
--- a/src/test/ui/parser/issue-62524.stderr
+++ b/src/test/ui/parser/issue-62524.stderr
@@ -1,5 +1,5 @@
 error: this file contains an unclosed delimiter
-  --> $DIR/issue-62524.rs:4:3
+  --> $DIR/issue-62524.rs:6:3
    |
 LL | y![
    |   - unclosed delimiter
@@ -7,7 +7,7 @@ LL | Ϥ,
    |   ^
 
 error: macros that expand to items must be delimited with braces or followed by a semicolon
-  --> $DIR/issue-62524.rs:3:3
+  --> $DIR/issue-62524.rs:5:3
    |
 LL |   y![
    |  ___^
@@ -24,7 +24,7 @@ LL | Ϥ,;
    |   ^
 
 error: cannot find macro `y` in this scope
-  --> $DIR/issue-62524.rs:3:1
+  --> $DIR/issue-62524.rs:5:1
    |
 LL | y![
    | ^
diff --git a/src/test/ui/utf8_idents.rs b/src/test/ui/utf8_idents.rs
index f59d5502aae..6c54086cc20 100644
--- a/src/test/ui/utf8_idents.rs
+++ b/src/test/ui/utf8_idents.rs
@@ -1,3 +1,5 @@
+#![allow(mixed_script_confusables)]
+
 fn foo<
     'β, //~ ERROR non-ascii idents are not fully supported
     γ  //~ ERROR non-ascii idents are not fully supported
diff --git a/src/test/ui/utf8_idents.stderr b/src/test/ui/utf8_idents.stderr
index 877412df8fa..2fc0b1c39ef 100644
--- a/src/test/ui/utf8_idents.stderr
+++ b/src/test/ui/utf8_idents.stderr
@@ -1,5 +1,5 @@
 error[E0658]: non-ascii idents are not fully supported
-  --> $DIR/utf8_idents.rs:2:5
+  --> $DIR/utf8_idents.rs:4:5
    |
 LL |     'β,
    |     ^^
@@ -8,7 +8,7 @@ LL |     'β,
    = help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
 
 error[E0658]: non-ascii idents are not fully supported
-  --> $DIR/utf8_idents.rs:3:5
+  --> $DIR/utf8_idents.rs:5:5
    |
 LL |     γ
    |     ^
@@ -17,7 +17,7 @@ LL |     γ
    = help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
 
 error[E0658]: non-ascii idents are not fully supported
-  --> $DIR/utf8_idents.rs:8:5
+  --> $DIR/utf8_idents.rs:10:5
    |
 LL |     δ: usize
    |     ^
@@ -26,7 +26,7 @@ LL |     δ: usize
    = help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
 
 error[E0658]: non-ascii idents are not fully supported
-  --> $DIR/utf8_idents.rs:12:9
+  --> $DIR/utf8_idents.rs:14:9
    |
 LL |     let α = 0.00001f64;
    |         ^
@@ -35,7 +35,7 @@ LL |     let α = 0.00001f64;
    = help: add `#![feature(non_ascii_idents)]` to the crate attributes to enable
 
 warning: type parameter `γ` should have an upper camel case name
-  --> $DIR/utf8_idents.rs:3:5
+  --> $DIR/utf8_idents.rs:5:5
    |
 LL |     γ
    |     ^ help: convert the identifier to upper camel case: `Γ`