Auto merge of #46347 - raventid:did-you-mean-increase-accuracy, r=estebank

Add case insensitive comparison, besides Levenstein for DYM Closes #46332 Draft version. The idea is that Levenstein does not work for some cases when we have multiple equal weights for strings. I didn't understand the case with `if found != name => Some(found)` so it means that new code does not work correctly yet. At least now I think that we might return all maximal weights from levenstein and think about next cases in priority order: 1) There is exact match -> None 2) There is exact match, but case insensitive -> Some(match) 3) There is some match from levenstein -> Some(matches.take_any) 4) There is no match -> None @estebank WDYT?
author: bors <bors@rust-lang.org> 2017-12-02 12:42:54 +0000
committer: bors <bors@rust-lang.org> 2017-12-02 12:42:54 +0000
commit: 7a649872a917068951228192b7e14626bd7bd540 (patch)
tree: 6289cfeb5086df6f00b48a0b0c708ad0fde7bcd7 /src/libsyntax/util
parent: e0d11f39d81496743ffe2037b0ec09a573948f40 (diff)
parent: f18446e78de1e925b9849bd618e20ed471ad0e61 (diff)
download: rust-7a649872a917068951228192b7e14626bd7bd540.tar.gz
rust-7a649872a917068951228192b7e14626bd7bd540.zip
1 files changed, 27 insertions, 5 deletions
diff --git a/src/libsyntax/util/lev_distance.rs b/src/libsyntax/util/lev_distance.rs
index 9307f3c58d4..e429791f2bd 100644
--- a/src/libsyntax/util/lev_distance.rs
+++ b/src/libsyntax/util/lev_distance.rs
@@ -44,23 +44,45 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
 /// To find the best match for a given string from an iterator of names
 /// As a loose rule to avoid the obviously incorrect suggestions, it takes
 /// an optional limit for the maximum allowable edit distance, which defaults
-/// to one-third of the given word
+/// to one-third of the given word.
+/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
+/// a lower(upper)case letters mismatch.
 pub fn find_best_match_for_name<'a, T>(iter_names: T,
                                        lookup: &str,
                                        dist: Option<usize>) -> Option<Symbol>
     where T: Iterator<Item = &'a Symbol> {
     let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d);
-    iter_names
+
+    let (case_insensitive_match, levenstein_match) = iter_names
     .filter_map(|&name| {
         let dist = lev_distance(lookup, &name.as_str());
-        if dist <= max_dist {    // filter the unwanted cases
+        if dist <= max_dist {
             Some((name, dist))
         } else {
             None
         }
     })
-    .min_by_key(|&(_, val)| val)    // extract the tuple containing the minimum edit distance
-    .map(|(s, _)| s)                // and return only the string
+    // Here we are collecting the next structure:
+    // (case_insensitive_match, (levenstein_match, levenstein_distance))
+    .fold((None, None), |result, (candidate, dist)| {
+        (
+            if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
+                Some(candidate)
+            } else {
+                result.0
+            },
+            match result.1 {
+                None => Some((candidate, dist)),
+                Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) })
+            }
+        )
+    });
+
+    if let Some(candidate) = case_insensitive_match {
+        Some(candidate) // exact case insensitive match has a higher priority
+    } else {
+        if let Some((candidate, _)) = levenstein_match { Some(candidate) } else { None }
+    }
 }
 
 #[test]
author	bors <bors@rust-lang.org>	2017-12-02 12:42:54 +0000
committer	bors <bors@rust-lang.org>	2017-12-02 12:42:54 +0000
commit	7a649872a917068951228192b7e14626bd7bd540 (patch)
tree	6289cfeb5086df6f00b48a0b0c708ad0fde7bcd7 /src/libsyntax/util
parent	e0d11f39d81496743ffe2037b0ec09a573948f40 (diff)
parent	f18446e78de1e925b9849bd618e20ed471ad0e61 (diff)
download	rust-7a649872a917068951228192b7e14626bd7bd540.tar.gz rust-7a649872a917068951228192b7e14626bd7bd540.zip