diff options
| author | xizheyin <xizheyin@smail.nju.edu.cn> | 2025-07-31 00:44:22 +0800 |
|---|---|---|
| committer | xizheyin <xizheyin@smail.nju.edu.cn> | 2025-07-31 13:55:59 +0800 |
| commit | 7b667e7811f4a4b496f38d25c5e824f13638cdbb (patch) | |
| tree | 9f83495f50b35a78912892b17202811cb032728e /compiler | |
| parent | 32e7a4b92b109c24e9822c862a7c74436b50e564 (diff) | |
| download | rust-7b667e7811f4a4b496f38d25c5e824f13638cdbb.tar.gz rust-7b667e7811f4a4b496f38d25c5e824f13638cdbb.zip | |
Extend `is_case_difference` to handle digit-letter confusables
Signed-off-by: xizheyin <xizheyin@smail.nju.edu.cn>
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/rustc_errors/src/emitter.rs | 135 | ||||
| -rw-r--r-- | compiler/rustc_errors/src/lib.rs | 11 |
2 files changed, 111 insertions, 35 deletions
diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 46a4a186824..55a42d0426e 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -262,19 +262,11 @@ pub trait Emitter { format!("help: {msg}") } else { // Show the default suggestion text with the substitution - format!( - "help: {}{}: `{}`", - msg, - if self - .source_map() - .is_some_and(|sm| is_case_difference(sm, snippet, part.span,)) - { - " (notice the capitalization)" - } else { - "" - }, - snippet, - ) + let confusion_type = self + .source_map() + .map(|sm| detect_confusion_type(sm, snippet, part.span)) + .unwrap_or(ConfusionType::None); + format!("help: {}{}: `{}`", msg, confusion_type.label_text(), snippet,) }; primary_span.push_span_label(part.span, msg); @@ -2028,12 +2020,12 @@ impl HumanEmitter { buffer.append(0, ": ", Style::HeaderMsg); let mut msg = vec![(suggestion.msg.to_owned(), Style::NoStyle)]; - if suggestions - .iter() - .take(MAX_SUGGESTIONS) - .any(|(_, _, _, only_capitalization)| *only_capitalization) + if let Some(confusion_type) = + suggestions.iter().take(MAX_SUGGESTIONS).find_map(|(_, _, _, confusion_type)| { + if confusion_type.has_confusion() { Some(*confusion_type) } else { None } + }) { - msg.push((" (notice the capitalization difference)".into(), Style::NoStyle)); + msg.push((confusion_type.label_text().into(), Style::NoStyle)); } self.msgs_to_buffer( &mut buffer, @@ -3528,24 +3520,107 @@ pub fn is_different(sm: &SourceMap, suggested: &str, sp: Span) -> bool { } /// Whether the original and suggested code are visually similar enough to warrant extra wording. -pub fn is_case_difference(sm: &SourceMap, suggested: &str, sp: Span) -> bool { - // FIXME: this should probably be extended to also account for `FO0` → `FOO` and unicode. +pub fn detect_confusion_type(sm: &SourceMap, suggested: &str, sp: Span) -> ConfusionType { let found = match sm.span_to_snippet(sp) { Ok(snippet) => snippet, Err(e) => { warn!(error = ?e, "Invalid span {:?}", sp); - return false; + return ConfusionType::None; } }; - let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z']; - // All the chars that differ in capitalization are confusable (above): - let confusable = iter::zip(found.chars(), suggested.chars()) - .filter(|(f, s)| f != s) - .all(|(f, s)| ascii_confusables.contains(&f) || ascii_confusables.contains(&s)); - confusable && found.to_lowercase() == suggested.to_lowercase() - // FIXME: We sometimes suggest the same thing we already have, which is a - // bug, but be defensive against that here. - && found != suggested + + let mut has_case_confusion = false; + let mut has_digit_letter_confusion = false; + + if found.len() == suggested.len() { + let mut has_case_diff = false; + let mut has_digit_letter_confusable = false; + let mut has_other_diff = false; + + let ascii_confusables = &['c', 'f', 'i', 'k', 'o', 's', 'u', 'v', 'w', 'x', 'y', 'z']; + + let digit_letter_confusables = [('0', 'O'), ('1', 'l'), ('5', 'S'), ('8', 'B'), ('9', 'g')]; + + for (f, s) in iter::zip(found.chars(), suggested.chars()) { + if f != s { + if f.to_lowercase().to_string() == s.to_lowercase().to_string() { + // Check for case differences (any character that differs only in case) + if ascii_confusables.contains(&f) || ascii_confusables.contains(&s) { + has_case_diff = true; + } else { + has_other_diff = true; + } + } else if digit_letter_confusables.contains(&(f, s)) + || digit_letter_confusables.contains(&(s, f)) + { + // Check for digit-letter confusables (like 0 vs O, 1 vs l, etc.) + has_digit_letter_confusable = true; + } else { + has_other_diff = true; + } + } + } + + // If we have case differences and no other differences + if has_case_diff && !has_other_diff && found != suggested { + has_case_confusion = true; + } + if has_digit_letter_confusable && !has_other_diff && found != suggested { + has_digit_letter_confusion = true; + } + } + + match (has_case_confusion, has_digit_letter_confusion) { + (true, true) => ConfusionType::Both, + (true, false) => ConfusionType::Case, + (false, true) => ConfusionType::DigitLetter, + (false, false) => ConfusionType::None, + } +} + +/// Represents the type of confusion detected between original and suggested code. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ConfusionType { + /// No confusion detected + None, + /// Only case differences (e.g., "hello" vs "Hello") + Case, + /// Only digit-letter confusion (e.g., "0" vs "O", "1" vs "l") + DigitLetter, + /// Both case and digit-letter confusion + Both, +} + +impl ConfusionType { + /// Returns the appropriate label text for this confusion type. + pub fn label_text(&self) -> &'static str { + match self { + ConfusionType::None => "", + ConfusionType::Case => " (notice the capitalization)", + ConfusionType::DigitLetter => " (notice the digit/letter confusion)", + ConfusionType::Both => " (notice the capitalization and digit/letter confusion)", + } + } + + /// Combines two confusion types. If either is `Both`, the result is `Both`. + /// If one is `Case` and the other is `DigitLetter`, the result is `Both`. + /// Otherwise, returns the non-`None` type, or `None` if both are `None`. + pub fn combine(self, other: ConfusionType) -> ConfusionType { + match (self, other) { + (ConfusionType::None, other) => other, + (this, ConfusionType::None) => this, + (ConfusionType::Both, _) | (_, ConfusionType::Both) => ConfusionType::Both, + (ConfusionType::Case, ConfusionType::DigitLetter) + | (ConfusionType::DigitLetter, ConfusionType::Case) => ConfusionType::Both, + (ConfusionType::Case, ConfusionType::Case) => ConfusionType::Case, + (ConfusionType::DigitLetter, ConfusionType::DigitLetter) => ConfusionType::DigitLetter, + } + } + + /// Returns true if this confusion type represents any kind of confusion. + pub fn has_confusion(&self) -> bool { + *self != ConfusionType::None + } } pub(crate) fn should_show_source_code( diff --git a/compiler/rustc_errors/src/lib.rs b/compiler/rustc_errors/src/lib.rs index 381d780077d..2534cddf105 100644 --- a/compiler/rustc_errors/src/lib.rs +++ b/compiler/rustc_errors/src/lib.rs @@ -50,7 +50,7 @@ pub use diagnostic_impls::{ IndicateAnonymousLifetime, SingleLabelManySpans, }; pub use emitter::ColorConfig; -use emitter::{DynEmitter, Emitter, is_case_difference, is_different}; +use emitter::{ConfusionType, DynEmitter, Emitter, detect_confusion_type, is_different}; use rustc_data_structures::AtomicRef; use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; use rustc_data_structures::stable_hasher::StableHasher; @@ -308,7 +308,7 @@ impl CodeSuggestion { pub(crate) fn splice_lines( &self, sm: &SourceMap, - ) -> Vec<(String, Vec<SubstitutionPart>, Vec<Vec<SubstitutionHighlight>>, bool)> { + ) -> Vec<(String, Vec<SubstitutionPart>, Vec<Vec<SubstitutionHighlight>>, ConfusionType)> { // For the `Vec<Vec<SubstitutionHighlight>>` value, the first level of the vector // corresponds to the output snippet's lines, while the second level corresponds to the // substrings within that line that should be highlighted. @@ -414,14 +414,15 @@ impl CodeSuggestion { // We need to keep track of the difference between the existing code and the added // or deleted code in order to point at the correct column *after* substitution. let mut acc = 0; - let mut only_capitalization = false; + let mut confusion_type = ConfusionType::None; for part in &mut substitution.parts { // If this is a replacement of, e.g. `"a"` into `"ab"`, adjust the // suggestion and snippet to look as if we just suggested to add // `"b"`, which is typically much easier for the user to understand. part.trim_trivial_replacements(sm); - only_capitalization |= is_case_difference(sm, &part.snippet, part.span); + let part_confusion = detect_confusion_type(sm, &part.snippet, part.span); + confusion_type = confusion_type.combine(part_confusion); let cur_lo = sm.lookup_char_pos(part.span.lo()); if prev_hi.line == cur_lo.line { let mut count = @@ -511,7 +512,7 @@ impl CodeSuggestion { if highlights.iter().all(|parts| parts.is_empty()) { None } else { - Some((buf, substitution.parts, highlights, only_capitalization)) + Some((buf, substitution.parts, highlights, confusion_type)) } }) .collect() |
