diff options
| author | Yuki Okushi <huyuumi.dev@gmail.com> | 2020-01-09 00:29:12 +0900 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-01-09 00:29:12 +0900 |
| commit | 429a7e7522fcec73ce2cbbc1ef47688276ce08d5 (patch) | |
| tree | 74b807f9023ccc7b972b8dcf2a5df4265233aba3 /src/libsyntax | |
| parent | 256f401b85da7ae6e3d146a41089897c37c2589a (diff) | |
| parent | e01e8b9256587a074968b440aa30d43b31642cb5 (diff) | |
| download | rust-429a7e7522fcec73ce2cbbc1ef47688276ce08d5.tar.gz rust-429a7e7522fcec73ce2cbbc1ef47688276ce08d5.zip | |
Rollup merge of #67849 - cjkenn:check-sorted-words, r=estebank
Add a check for swapped words when we can't find an identifier Fixes #66968 Couple things here: 1. The matches take the precedence of case insensitive match, then levenshtein match, then swapped words match. Doing this allows us to not even check for swapped words unless the other checks return `None`. 2. I've assumed that the swapped words check is not held to the limits of the max levenshtein distance threshold (ie. we want to try and find a match even if the levenshtein distance is very high). This means that we cannot perform this check in the `fold` that occurs after the `filter_map` call, because the candidate will be filtered out. So, I've split this into two separate `fold` calls, and had to collect the original iterator into a vec so it can be copied (I don't think we want to change the function signature to take a vec or require the `Copy` trait). An alternative implemenation may be to remove the `filter_map`, `fold` over the entire iterator, and do a check against `max_dist` inside the relevant cases there. r? @estebank
Diffstat (limited to 'src/libsyntax')
| -rw-r--r-- | src/libsyntax/util/lev_distance.rs | 33 | ||||
| -rw-r--r-- | src/libsyntax/util/lev_distance/tests.rs | 6 |
2 files changed, 34 insertions, 5 deletions
diff --git a/src/libsyntax/util/lev_distance.rs b/src/libsyntax/util/lev_distance.rs index fc697026fe4..cce86fed989 100644 --- a/src/libsyntax/util/lev_distance.rs +++ b/src/libsyntax/util/lev_distance.rs @@ -54,14 +54,16 @@ where T: Iterator<Item = &'a Symbol>, { let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d); + let name_vec: Vec<&Symbol> = iter_names.collect(); - let (case_insensitive_match, levenstein_match) = iter_names + let (case_insensitive_match, levenshtein_match) = name_vec + .iter() .filter_map(|&name| { let dist = lev_distance(lookup, &name.as_str()); if dist <= max_dist { Some((name, dist)) } else { None } }) // Here we are collecting the next structure: - // (case_insensitive_match, (levenstein_match, levenstein_distance)) + // (case_insensitive_match, (levenshtein_match, levenshtein_distance)) .fold((None, None), |result, (candidate, dist)| { ( if candidate.as_str().to_uppercase() == lookup.to_uppercase() { @@ -75,10 +77,31 @@ where }, ) }); - + // Priority of matches: + // 1. Exact case insensitive match + // 2. Levenshtein distance match + // 3. Sorted word match if let Some(candidate) = case_insensitive_match { - Some(candidate) // exact case insensitive match has a higher priority + Some(*candidate) + } else if levenshtein_match.is_some() { + levenshtein_match.map(|(candidate, _)| *candidate) } else { - levenstein_match.map(|(candidate, _)| candidate) + find_match_by_sorted_words(name_vec, lookup) } } + +fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> { + iter_names.iter().fold(None, |result, candidate| { + if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) { + Some(**candidate) + } else { + result + } + }) +} + +fn sort_by_words(name: &str) -> String { + let mut split_words: Vec<&str> = name.split('_').collect(); + split_words.sort(); + split_words.join("_") +} diff --git a/src/libsyntax/util/lev_distance/tests.rs b/src/libsyntax/util/lev_distance/tests.rs index f65f9275d03..222661687c1 100644 --- a/src/libsyntax/util/lev_distance/tests.rs +++ b/src/libsyntax/util/lev_distance/tests.rs @@ -46,5 +46,11 @@ fn test_find_best_match_for_name() { find_best_match_for_name(input.iter(), "aaaa", Some(4)), Some(Symbol::intern("AAAA")) ); + + let input = vec![Symbol::intern("a_longer_variable_name")]; + assert_eq!( + find_best_match_for_name(input.iter(), "a_variable_longer_name", None), + Some(Symbol::intern("a_longer_variable_name")) + ); }) } |
