From 51ff17194828cfd2715eea2c7c19d38f7b889007 Mon Sep 17 00:00:00 2001 From: Ravi Shankar Date: Mon, 14 Dec 2015 22:36:31 +0530 Subject: Modify the Levenshtein-based suggestions to include imports --- src/libsyntax/util/lev_distance.rs | 54 ++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 20 deletions(-) (limited to 'src/libsyntax/util/lev_distance.rs') diff --git a/src/libsyntax/util/lev_distance.rs b/src/libsyntax/util/lev_distance.rs index 9bf96311122..e0796c34e57 100644 --- a/src/libsyntax/util/lev_distance.rs +++ b/src/libsyntax/util/lev_distance.rs @@ -8,50 +8,64 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use ast::Name; use std::cmp; +use parse::token::InternedString; -pub fn lev_distance(me: &str, t: &str) -> usize { - if me.is_empty() { return t.chars().count(); } - if t.is_empty() { return me.chars().count(); } +/// To find the Levenshtein distance between two strings +pub fn lev_distance(a: &str, b: &str) -> usize { + // cases which don't require further computation + if a.is_empty() { + return b.chars().count(); + } else if b.is_empty() { + return a.chars().count(); + } - let mut dcol: Vec<_> = (0..t.len() + 1).collect(); + let mut dcol: Vec<_> = (0..b.len() + 1).collect(); let mut t_last = 0; - for (i, sc) in me.chars().enumerate() { - + for (i, sc) in a.chars().enumerate() { let mut current = i; dcol[0] = current + 1; - for (j, tc) in t.chars().enumerate() { - + for (j, tc) in b.chars().enumerate() { let next = dcol[j + 1]; - if sc == tc { dcol[j + 1] = current; } else { dcol[j + 1] = cmp::min(current, next); dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1; } - current = next; t_last = j; } - } - - dcol[t_last + 1] + } dcol[t_last + 1] } -pub fn max_suggestion_distance(name: &str) -> usize { - use std::cmp::max; - // As a loose rule to avoid obviously incorrect suggestions, clamp the - // maximum edit distance we will accept for a suggestion to one third of - // the typo'd name's length. - max(name.len(), 3) / 3 +/// To find the best match for a given string from an iterator of names +/// As a loose rule to avoid the obviously incorrect suggestions, it takes +/// an optional limit for the maximum allowable edit distance, which defaults +/// to one-third of the given word +pub fn find_best_match_for_name<'a, T>(iter_names: T, + lookup: &str, + dist: Option) -> Option + where T: Iterator { + let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d); + iter_names + .filter_map(|name| { + let dist = lev_distance(lookup, &name.as_str()); + match dist <= max_dist { // filter the unwanted cases + true => Some((name.as_str(), dist)), + false => None, + } + }) + .min_by_key(|&(_, val)| val) // extract the tuple containing the minimum edit distance + .map(|(s, _)| s) // and return only the string } #[test] fn test_lev_distance() { - use std::char::{ from_u32, MAX }; + use std::char::{from_u32, MAX}; // Test bytelength agnosticity for c in (0..MAX as u32) .filter_map(|i| from_u32(i)) -- cgit 1.4.1-3-g733a5