diff options
| author | bors <bors@rust-lang.org> | 2017-12-02 12:42:54 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2017-12-02 12:42:54 +0000 |
| commit | 7a649872a917068951228192b7e14626bd7bd540 (patch) | |
| tree | 6289cfeb5086df6f00b48a0b0c708ad0fde7bcd7 /src/libsyntax/util | |
| parent | e0d11f39d81496743ffe2037b0ec09a573948f40 (diff) | |
| parent | f18446e78de1e925b9849bd618e20ed471ad0e61 (diff) | |
| download | rust-7a649872a917068951228192b7e14626bd7bd540.tar.gz rust-7a649872a917068951228192b7e14626bd7bd540.zip | |
Auto merge of #46347 - raventid:did-you-mean-increase-accuracy, r=estebank
Add case insensitive comparison, besides Levenstein for DYM Closes #46332 Draft version. The idea is that Levenstein does not work for some cases when we have multiple equal weights for strings. I didn't understand the case with `if found != name => Some(found)` so it means that new code does not work correctly yet. At least now I think that we might return all maximal weights from levenstein and think about next cases in priority order: 1) There is exact match -> None 2) There is exact match, but case insensitive -> Some(match) 3) There is some match from levenstein -> Some(matches.take_any) 4) There is no match -> None @estebank WDYT?
Diffstat (limited to 'src/libsyntax/util')
| -rw-r--r-- | src/libsyntax/util/lev_distance.rs | 32 |
1 files changed, 27 insertions, 5 deletions
diff --git a/src/libsyntax/util/lev_distance.rs b/src/libsyntax/util/lev_distance.rs index 9307f3c58d4..e429791f2bd 100644 --- a/src/libsyntax/util/lev_distance.rs +++ b/src/libsyntax/util/lev_distance.rs @@ -44,23 +44,45 @@ pub fn lev_distance(a: &str, b: &str) -> usize { /// To find the best match for a given string from an iterator of names /// As a loose rule to avoid the obviously incorrect suggestions, it takes /// an optional limit for the maximum allowable edit distance, which defaults -/// to one-third of the given word +/// to one-third of the given word. +/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with +/// a lower(upper)case letters mismatch. pub fn find_best_match_for_name<'a, T>(iter_names: T, lookup: &str, dist: Option<usize>) -> Option<Symbol> where T: Iterator<Item = &'a Symbol> { let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d); - iter_names + + let (case_insensitive_match, levenstein_match) = iter_names .filter_map(|&name| { let dist = lev_distance(lookup, &name.as_str()); - if dist <= max_dist { // filter the unwanted cases + if dist <= max_dist { Some((name, dist)) } else { None } }) - .min_by_key(|&(_, val)| val) // extract the tuple containing the minimum edit distance - .map(|(s, _)| s) // and return only the string + // Here we are collecting the next structure: + // (case_insensitive_match, (levenstein_match, levenstein_distance)) + .fold((None, None), |result, (candidate, dist)| { + ( + if candidate.as_str().to_uppercase() == lookup.to_uppercase() { + Some(candidate) + } else { + result.0 + }, + match result.1 { + None => Some((candidate, dist)), + Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }) + } + ) + }); + + if let Some(candidate) = case_insensitive_match { + Some(candidate) // exact case insensitive match has a higher priority + } else { + if let Some((candidate, _)) = levenstein_match { Some(candidate) } else { None } + } } #[test] |
