about summary refs log tree commit diff
path: root/compiler/rustc_span/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_span/src')
-rw-r--r--compiler/rustc_span/src/analyze_source_file.rs4
-rw-r--r--compiler/rustc_span/src/lev_distance.rs104
-rw-r--r--compiler/rustc_span/src/lev_distance/tests.rs56
-rw-r--r--compiler/rustc_span/src/lib.rs6
-rw-r--r--compiler/rustc_span/src/source_map.rs6
-rw-r--r--compiler/rustc_span/src/symbol.rs50
6 files changed, 203 insertions, 23 deletions
diff --git a/compiler/rustc_span/src/analyze_source_file.rs b/compiler/rustc_span/src/analyze_source_file.rs
index b4beb3dc376..5987fb2a198 100644
--- a/compiler/rustc_span/src/analyze_source_file.rs
+++ b/compiler/rustc_span/src/analyze_source_file.rs
@@ -97,7 +97,7 @@ cfg_if::cfg_if! {
                 let ptr = src_bytes.as_ptr() as *const __m128i;
                 // We don't know if the pointer is aligned to 16 bytes, so we
                 // use `loadu`, which supports unaligned loading.
-                let chunk = _mm_loadu_si128(ptr.offset(chunk_index as isize));
+                let chunk = _mm_loadu_si128(ptr.add(chunk_index));
 
                 // For character in the chunk, see if its byte value is < 0, which
                 // indicates that it's part of a UTF-8 char.
@@ -253,7 +253,7 @@ fn analyze_source_file_generic(
             let pos = BytePos::from_usize(i) + output_offset;
 
             if char_len > 1 {
-                assert!(char_len >= 2 && char_len <= 4);
+                assert!((2..=4).contains(&char_len));
                 let mbc = MultiByteChar { pos, bytes: char_len as u8 };
                 multi_byte_chars.push(mbc);
             }
diff --git a/compiler/rustc_span/src/lev_distance.rs b/compiler/rustc_span/src/lev_distance.rs
new file mode 100644
index 00000000000..edc6625a6ea
--- /dev/null
+++ b/compiler/rustc_span/src/lev_distance.rs
@@ -0,0 +1,104 @@
+use crate::symbol::Symbol;
+use std::cmp;
+
+#[cfg(test)]
+mod tests;
+
+/// Finds the Levenshtein distance between two strings
+pub fn lev_distance(a: &str, b: &str) -> usize {
+    // cases which don't require further computation
+    if a.is_empty() {
+        return b.chars().count();
+    } else if b.is_empty() {
+        return a.chars().count();
+    }
+
+    let mut dcol: Vec<_> = (0..=b.len()).collect();
+    let mut t_last = 0;
+
+    for (i, sc) in a.chars().enumerate() {
+        let mut current = i;
+        dcol[0] = current + 1;
+
+        for (j, tc) in b.chars().enumerate() {
+            let next = dcol[j + 1];
+            if sc == tc {
+                dcol[j + 1] = current;
+            } else {
+                dcol[j + 1] = cmp::min(current, next);
+                dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
+            }
+            current = next;
+            t_last = j;
+        }
+    }
+    dcol[t_last + 1]
+}
+
+/// Finds the best match for a given word in the given iterator
+///
+/// As a loose rule to avoid the obviously incorrect suggestions, it takes
+/// an optional limit for the maximum allowable edit distance, which defaults
+/// to one-third of the given word.
+///
+/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
+/// a lower(upper)case letters mismatch.
+#[cold]
+pub fn find_best_match_for_name(
+    name_vec: &[Symbol],
+    lookup: Symbol,
+    dist: Option<usize>,
+) -> Option<Symbol> {
+    let lookup = &lookup.as_str();
+    let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
+
+    let (case_insensitive_match, levenshtein_match) = name_vec
+        .iter()
+        .filter_map(|&name| {
+            let dist = lev_distance(lookup, &name.as_str());
+            if dist <= max_dist { Some((name, dist)) } else { None }
+        })
+        // Here we are collecting the next structure:
+        // (case_insensitive_match, (levenshtein_match, levenshtein_distance))
+        .fold((None, None), |result, (candidate, dist)| {
+            (
+                if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
+                    Some(candidate)
+                } else {
+                    result.0
+                },
+                match result.1 {
+                    None => Some((candidate, dist)),
+                    Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }),
+                },
+            )
+        });
+    // Priority of matches:
+    // 1. Exact case insensitive match
+    // 2. Levenshtein distance match
+    // 3. Sorted word match
+    if let Some(candidate) = case_insensitive_match {
+        Some(candidate)
+    } else if levenshtein_match.is_some() {
+        levenshtein_match.map(|(candidate, _)| candidate)
+    } else {
+        find_match_by_sorted_words(name_vec, lookup)
+    }
+}
+
+fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {
+    iter_names.iter().fold(None, |result, candidate| {
+        if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
+            Some(*candidate)
+        } else {
+            result
+        }
+    })
+}
+
+fn sort_by_words(name: &str) -> String {
+    let mut split_words: Vec<&str> = name.split('_').collect();
+    // We are sorting primitive &strs and can use unstable sort here
+    split_words.sort_unstable();
+    split_words.join("_")
+}
diff --git a/compiler/rustc_span/src/lev_distance/tests.rs b/compiler/rustc_span/src/lev_distance/tests.rs
new file mode 100644
index 00000000000..7aa01cb8efe
--- /dev/null
+++ b/compiler/rustc_span/src/lev_distance/tests.rs
@@ -0,0 +1,56 @@
+use super::*;
+
+#[test]
+fn test_lev_distance() {
+    use std::char::{from_u32, MAX};
+    // Test bytelength agnosticity
+    for c in (0..MAX as u32).filter_map(|i| from_u32(i)).map(|i| i.to_string()) {
+        assert_eq!(lev_distance(&c[..], &c[..]), 0);
+    }
+
+    let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
+    let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
+    let c = "Mary häd ä little lämb\n\nLittle lämb\n";
+    assert_eq!(lev_distance(a, b), 1);
+    assert_eq!(lev_distance(b, a), 1);
+    assert_eq!(lev_distance(a, c), 2);
+    assert_eq!(lev_distance(c, a), 2);
+    assert_eq!(lev_distance(b, c), 1);
+    assert_eq!(lev_distance(c, b), 1);
+}
+
+#[test]
+fn test_find_best_match_for_name() {
+    use crate::with_default_session_globals;
+    with_default_session_globals(|| {
+        let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
+        assert_eq!(
+            find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
+            Some(Symbol::intern("aaab"))
+        );
+
+        assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None);
+
+        let input = vec![Symbol::intern("aAAA")];
+        assert_eq!(
+            find_best_match_for_name(&input, Symbol::intern("AAAA"), None),
+            Some(Symbol::intern("aAAA"))
+        );
+
+        let input = vec![Symbol::intern("AAAA")];
+        // Returns None because `lev_distance > max_dist / 3`
+        assert_eq!(find_best_match_for_name(&input, Symbol::intern("aaaa"), None), None);
+
+        let input = vec![Symbol::intern("AAAA")];
+        assert_eq!(
+            find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)),
+            Some(Symbol::intern("AAAA"))
+        );
+
+        let input = vec![Symbol::intern("a_longer_variable_name")];
+        assert_eq!(
+            find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None),
+            Some(Symbol::intern("a_longer_variable_name"))
+        );
+    })
+}
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index 0926561f4c5..f63a73acbf4 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -34,6 +34,7 @@ use hygiene::Transparency;
 pub use hygiene::{DesugaringKind, ExpnData, ExpnId, ExpnKind, ForLoopLoc, MacroKind};
 pub mod def_id;
 use def_id::{CrateNum, DefId, LOCAL_CRATE};
+pub mod lev_distance;
 mod span_encoding;
 pub use span_encoding::{Span, DUMMY_SP};
 
@@ -1014,10 +1015,7 @@ pub enum ExternalSourceKind {
 
 impl ExternalSource {
     pub fn is_absent(&self) -> bool {
-        match self {
-            ExternalSource::Foreign { kind: ExternalSourceKind::Present(_), .. } => false,
-            _ => true,
-        }
+        !matches!(self, ExternalSource::Foreign { kind: ExternalSourceKind::Present(_), .. })
     }
 
     pub fn get_source(&self) -> Option<&Lrc<String>> {
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
index f067cdb7308..e9b4eb6e4ab 100644
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
@@ -623,7 +623,7 @@ impl SourceMap {
         self.span_to_source(sp, |src, start_index, end_index| {
             src.get(start_index..end_index)
                 .map(|s| s.to_string())
-                .ok_or_else(|| SpanSnippetError::IllFormedSpan(sp))
+                .ok_or(SpanSnippetError::IllFormedSpan(sp))
         })
     }
 
@@ -640,9 +640,7 @@ impl SourceMap {
     /// Returns the source snippet as `String` before the given `Span`.
     pub fn span_to_prev_source(&self, sp: Span) -> Result<String, SpanSnippetError> {
         self.span_to_source(sp, |src, start_index, _| {
-            src.get(..start_index)
-                .map(|s| s.to_string())
-                .ok_or_else(|| SpanSnippetError::IllFormedSpan(sp))
+            src.get(..start_index).map(|s| s.to_string()).ok_or(SpanSnippetError::IllFormedSpan(sp))
         })
     }
 
diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 3a2a3adce35..b60d466c3a7 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -267,6 +267,7 @@ symbols! {
         asm,
         assert,
         assert_inhabited,
+        assert_macro,
         assert_receiver_is_total_eq,
         assert_uninit_valid,
         assert_zero_valid,
@@ -284,6 +285,7 @@ symbols! {
         attr_literals,
         attributes,
         augmented_assignments,
+        auto_traits,
         automatically_derived,
         avx512_target_feature,
         await_macro,
@@ -354,6 +356,7 @@ symbols! {
         concat_idents,
         conservative_impl_trait,
         console,
+        const_allocate,
         const_compare_raw_pointers,
         const_constructor,
         const_eval_limit,
@@ -393,6 +396,7 @@ symbols! {
         copysignf64,
         core,
         core_intrinsics,
+        core_panic_macro,
         cosf32,
         cosf64,
         crate_id,
@@ -416,6 +420,7 @@ symbols! {
         dead_code,
         dealloc,
         debug,
+        debug_assert_macro,
         debug_assertions,
         debug_struct,
         debug_trait,
@@ -455,6 +460,9 @@ symbols! {
         document_private_items,
         dotdot_in_tuple_patterns,
         dotdoteq_in_patterns,
+        dreg,
+        dreg_low16,
+        dreg_low8,
         drop,
         drop_in_place,
         drop_types_in_const,
@@ -491,6 +499,7 @@ symbols! {
         expf64,
         export_name,
         expr,
+        extended_key_value_attributes,
         extern_absolute_paths,
         extern_crate_item_prelude,
         extern_crate_self,
@@ -538,6 +547,7 @@ symbols! {
         format_args_capture,
         format_args_nl,
         freeze,
+        freg,
         frem_fast,
         from,
         from_desugaring,
@@ -621,6 +631,7 @@ symbols! {
         iter,
         keyword,
         kind,
+        kreg,
         label,
         label_break_value,
         lang,
@@ -646,6 +657,7 @@ symbols! {
         lint_reasons,
         literal,
         llvm_asm,
+        local,
         local_inner_macros,
         log10f32,
         log10f64,
@@ -789,6 +801,7 @@ symbols! {
         panic_runtime,
         panic_str,
         panic_unwind,
+        panicking,
         param_attrs,
         parent_trait,
         partial_cmp,
@@ -847,6 +860,9 @@ symbols! {
         pub_restricted,
         pure,
         pushpop_unsafe,
+        qreg,
+        qreg_low4,
+        qreg_low8,
         quad_precision_float,
         question_mark,
         quote,
@@ -868,6 +884,13 @@ symbols! {
         reexport_test_harness_main,
         reference,
         reflect,
+        reg,
+        reg16,
+        reg32,
+        reg64,
+        reg_abcd,
+        reg_byte,
+        reg_thumb,
         register_attr,
         register_tool,
         relaxed_adts,
@@ -1053,6 +1076,8 @@ symbols! {
         spotlight,
         sqrtf32,
         sqrtf64,
+        sreg,
+        sreg_low16,
         sse4a_target_feature,
         stable,
         staged_api,
@@ -1064,6 +1089,7 @@ symbols! {
         staticlib,
         std,
         std_inject,
+        std_panic_macro,
         stmt,
         stmt_expr_attributes,
         stop_after_dataflow,
@@ -1207,6 +1233,8 @@ symbols! {
         volatile_load,
         volatile_set_memory,
         volatile_store,
+        vreg,
+        vreg_low16,
         warn,
         wasm_import_module,
         wasm_target_feature,
@@ -1218,6 +1246,9 @@ symbols! {
         wrapping_mul,
         wrapping_sub,
         write_bytes,
+        xmm_reg,
+        ymm_reg,
+        zmm_reg,
     }
 }
 
@@ -1356,15 +1387,13 @@ impl fmt::Display for IdentPrinter {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         if self.is_raw {
             f.write_str("r#")?;
-        } else {
-            if self.symbol == kw::DollarCrate {
-                if let Some(span) = self.convert_dollar_crate {
-                    let converted = span.ctxt().dollar_crate_name();
-                    if !converted.is_path_segment_keyword() {
-                        f.write_str("::")?;
-                    }
-                    return fmt::Display::fmt(&converted, f);
+        } else if self.symbol == kw::DollarCrate {
+            if let Some(span) = self.convert_dollar_crate {
+                let converted = span.ctxt().dollar_crate_name();
+                if !converted.is_path_segment_keyword() {
+                    f.write_str("::")?;
                 }
+                return fmt::Display::fmt(&converted, f);
             }
         }
         fmt::Display::fmt(&self.symbol, f)
@@ -1584,11 +1613,6 @@ impl Symbol {
         self == kw::Try
     }
 
-    /// Used for sanity checking rustdoc keyword sections.
-    pub fn is_doc_keyword(self) -> bool {
-        self <= kw::Union
-    }
-
     /// A keyword or reserved identifier that can be used as a path segment.
     pub fn is_path_segment_keyword(self) -> bool {
         self == kw::Super