summary refs log tree commit diff
path: root/compiler
diff options
context:
space:
mode:
authorxizheyin <xizheyin@smail.nju.edu.cn>2025-07-31 00:44:22 +0800
committerxizheyin <xizheyin@smail.nju.edu.cn>2025-07-31 13:55:59 +0800
commit7b667e7811f4a4b496f38d25c5e824f13638cdbb (patch)
tree9f83495f50b35a78912892b17202811cb032728e /compiler
parent32e7a4b92b109c24e9822c862a7c74436b50e564 (diff)
downloadrust-7b667e7811f4a4b496f38d25c5e824f13638cdbb.tar.gz
rust-7b667e7811f4a4b496f38d25c5e824f13638cdbb.zip
Extend `is_case_difference` to handle digit-letter confusables
Signed-off-by: xizheyin <xizheyin@smail.nju.edu.cn>
Diffstat (limited to 'compiler')
-rw-r--r--compiler/rustc_errors/src/emitter.rs135
-rw-r--r--compiler/rustc_errors/src/lib.rs11
2 files changed, 111 insertions, 35 deletions
diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs
index 46a4a186824..55a42d0426e 100644
--- a/compiler/rustc_errors/src/emitter.rs
+++ b/compiler/rustc_errors/src/emitter.rs
@@ -262,19 +262,11 @@ pub trait Emitter {
                     format!("help: {msg}")
                 } else {
                     // Show the default suggestion text with the substitution
-                    format!(
-                        "help: {}{}: `{}`",
-                        msg,
-                        if self
-                            .source_map()
-                            .is_some_and(|sm| is_case_difference(sm, snippet, part.span,))
-                        {
-                            " (notice the capitalization)"
-                        } else {
-                            ""
-                        },
-                        snippet,
-                    )
+                    let confusion_type = self
+                        .source_map()
+                        .map(|sm| detect_confusion_type(sm, snippet, part.span))
+                        .unwrap_or(ConfusionType::None);
+                    format!("help: {}{}: `{}`", msg, confusion_type.label_text(), snippet,)
                 };
                 primary_span.push_span_label(part.span, msg);
 
@@ -2028,12 +2020,12 @@ impl HumanEmitter {
         buffer.append(0, ": ", Style::HeaderMsg);
 
         let mut msg = vec![(suggestion.msg.to_owned(), Style::NoStyle)];
-        if suggestions
-            .iter()
-            .take(MAX_SUGGESTIONS)
-            .any(|(_, _, _, only_capitalization)| *only_capitalization)
+        if let Some(confusion_type) =
+            suggestions.iter().take(MAX_SUGGESTIONS).find_map(|(_, _, _, confusion_type)| {
+                if confusion_type.has_confusion() { Some(*confusion_type) } else { None }
+            })
         {
-            msg.push((" (notice the capitalization difference)".into(), Style::NoStyle));
+            msg.push((confusion_type.label_text().into(), Style::NoStyle));
         }
         self.msgs_to_buffer(
             &mut buffer,
@@ -3528,24 +3520,107 @@ pub fn is_different(sm: &SourceMap, suggested: &str, sp: Span) -> bool {
 }
 
 /// Whether the original and suggested code are visually similar enough to warrant extra wording.
-pub fn is_case_difference(sm: &SourceMap, suggested: &str, sp: Span) -> bool {
-    // FIXME: this should probably be extended to also account for `FO0` → `FOO` and unicode.
+pub fn detect_confusion_type(sm: &SourceMap, suggested: &str, sp: Span) -> ConfusionType {
     let found = match sm.span_to_snippet(sp) {
         Ok(snippet) => snippet,
         Err(e) => {
             warn!(error = ?e, "Invalid span {:?}", sp);
-            return false;
+            return ConfusionType::None;
         }
     };
-    let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z'];
-    // All the chars that differ in capitalization are confusable (above):
-    let confusable = iter::zip(found.chars(), suggested.chars())
-        .filter(|(f, s)| f != s)
-        .all(|(f, s)| ascii_confusables.contains(&f) || ascii_confusables.contains(&s));
-    confusable && found.to_lowercase() == suggested.to_lowercase()
-            // FIXME: We sometimes suggest the same thing we already have, which is a
-            //        bug, but be defensive against that here.
-            && found != suggested
+
+    let mut has_case_confusion = false;
+    let mut has_digit_letter_confusion = false;
+
+    if found.len() == suggested.len() {
+        let mut has_case_diff = false;
+        let mut has_digit_letter_confusable = false;
+        let mut has_other_diff = false;
+
+        let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z'];
+
+        let digit_letter_confusables = [('0', 'O'), ('1', 'l'), ('5', 'S'), ('8', 'B'), ('9', 'g')];
+
+        for (f, s) in iter::zip(found.chars(), suggested.chars()) {
+            if f != s {
+                if f.to_lowercase().to_string() == s.to_lowercase().to_string() {
+                    // Check for case differences (any character that differs only in case)
+                    if ascii_confusables.contains(&f) || ascii_confusables.contains(&s) {
+                        has_case_diff = true;
+                    } else {
+                        has_other_diff = true;
+                    }
+                } else if digit_letter_confusables.contains(&(f, s))
+                    || digit_letter_confusables.contains(&(s, f))
+                {
+                    // Check for digit-letter confusables (like 0 vs O, 1 vs l, etc.)
+                    has_digit_letter_confusable = true;
+                } else {
+                    has_other_diff = true;
+                }
+            }
+        }
+
+        // If we have case differences and no other differences
+        if has_case_diff && !has_other_diff && found != suggested {
+            has_case_confusion = true;
+        }
+        if has_digit_letter_confusable && !has_other_diff && found != suggested {
+            has_digit_letter_confusion = true;
+        }
+    }
+
+    match (has_case_confusion, has_digit_letter_confusion) {
+        (true, true) => ConfusionType::Both,
+        (true, false) => ConfusionType::Case,
+        (false, true) => ConfusionType::DigitLetter,
+        (false, false) => ConfusionType::None,
+    }
+}
+
+/// Represents the type of confusion detected between original and suggested code.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ConfusionType {
+    /// No confusion detected
+    None,
+    /// Only case differences (e.g., "hello" vs "Hello")
+    Case,
+    /// Only digit-letter confusion (e.g., "0" vs "O", "1" vs "l")
+    DigitLetter,
+    /// Both case and digit-letter confusion
+    Both,
+}
+
+impl ConfusionType {
+    /// Returns the appropriate label text for this confusion type.
+    pub fn label_text(&self) -> &'static str {
+        match self {
+            ConfusionType::None => "",
+            ConfusionType::Case => " (notice the capitalization)",
+            ConfusionType::DigitLetter => " (notice the digit/letter confusion)",
+            ConfusionType::Both => " (notice the capitalization and digit/letter confusion)",
+        }
+    }
+
+    /// Combines two confusion types. If either is `Both`, the result is `Both`.
+    /// If one is `Case` and the other is `DigitLetter`, the result is `Both`.
+    /// Otherwise, returns the non-`None` type, or `None` if both are `None`.
+    pub fn combine(self, other: ConfusionType) -> ConfusionType {
+        match (self, other) {
+            (ConfusionType::None, other) => other,
+            (this, ConfusionType::None) => this,
+            (ConfusionType::Both, _) | (_, ConfusionType::Both) => ConfusionType::Both,
+            (ConfusionType::Case, ConfusionType::DigitLetter)
+            | (ConfusionType::DigitLetter, ConfusionType::Case) => ConfusionType::Both,
+            (ConfusionType::Case, ConfusionType::Case) => ConfusionType::Case,
+            (ConfusionType::DigitLetter, ConfusionType::DigitLetter) => ConfusionType::DigitLetter,
+        }
+    }
+
+    /// Returns true if this confusion type represents any kind of confusion.
+    pub fn has_confusion(&self) -> bool {
+        *self != ConfusionType::None
+    }
 }
 
 pub(crate) fn should_show_source_code(
diff --git a/compiler/rustc_errors/src/lib.rs b/compiler/rustc_errors/src/lib.rs
index 381d780077d..2534cddf105 100644
--- a/compiler/rustc_errors/src/lib.rs
+++ b/compiler/rustc_errors/src/lib.rs
@@ -50,7 +50,7 @@ pub use diagnostic_impls::{
     IndicateAnonymousLifetime, SingleLabelManySpans,
 };
 pub use emitter::ColorConfig;
-use emitter::{DynEmitter, Emitter, is_case_difference, is_different};
+use emitter::{ConfusionType, DynEmitter, Emitter, detect_confusion_type, is_different};
 use rustc_data_structures::AtomicRef;
 use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet};
 use rustc_data_structures::stable_hasher::StableHasher;
@@ -308,7 +308,7 @@ impl CodeSuggestion {
     pub(crate) fn splice_lines(
         &self,
         sm: &SourceMap,
-    ) -> Vec<(String, Vec<SubstitutionPart>, Vec<Vec<SubstitutionHighlight>>, bool)> {
+    ) -> Vec<(String, Vec<SubstitutionPart>, Vec<Vec<SubstitutionHighlight>>, ConfusionType)> {
         // For the `Vec<Vec<SubstitutionHighlight>>` value, the first level of the vector
         // corresponds to the output snippet's lines, while the second level corresponds to the
         // substrings within that line that should be highlighted.
@@ -414,14 +414,15 @@ impl CodeSuggestion {
                 // We need to keep track of the difference between the existing code and the added
                 // or deleted code in order to point at the correct column *after* substitution.
                 let mut acc = 0;
-                let mut only_capitalization = false;
+                let mut confusion_type = ConfusionType::None;
                 for part in &mut substitution.parts {
                     // If this is a replacement of, e.g. `"a"` into `"ab"`, adjust the
                     // suggestion and snippet to look as if we just suggested to add
                     // `"b"`, which is typically much easier for the user to understand.
                     part.trim_trivial_replacements(sm);
 
-                    only_capitalization |= is_case_difference(sm, &part.snippet, part.span);
+                    let part_confusion = detect_confusion_type(sm, &part.snippet, part.span);
+                    confusion_type = confusion_type.combine(part_confusion);
                     let cur_lo = sm.lookup_char_pos(part.span.lo());
                     if prev_hi.line == cur_lo.line {
                         let mut count =
@@ -511,7 +512,7 @@ impl CodeSuggestion {
                 if highlights.iter().all(|parts| parts.is_empty()) {
                     None
                 } else {
-                    Some((buf, substitution.parts, highlights, only_capitalization))
+                    Some((buf, substitution.parts, highlights, confusion_type))
                 }
             })
             .collect()