about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Sloan <mgsloan@gmail.com>2024-12-26 01:08:03 -0700
committerMichael Sloan <mgsloan@gmail.com>2024-12-26 01:28:37 -0700
commitbbc6242b4cfe3a80633874e01d606d8e35b85f31 (patch)
tree76a46815622d788a674547ef5a3331b1b0a0f607
parentb1c091c5738d0a11e10c89a238eabaada15c8c48 (diff)
downloadrust-bbc6242b4cfe3a80633874e01d606d8e35b85f31.tar.gz
rust-bbc6242b4cfe3a80633874e01d606d8e35b85f31.zip
Provide SCIP `external_symbols` + fix symbols provided with Document
Before this change `SymbolInformation` provided by a document was the
info for all encountered symbols that have not yet been emitted. So,
the symbol information on a `Document` was a mishmash of symbols
defined in the documents, symbols from other documents, and external
symbols.

After this change, the `SymbolInformation` on documents is just the
locals and defined symbols from the document.  All symbols referenced
and not from emitted documents are included in `external_symbols`.
-rw-r--r--src/tools/rust-analyzer/crates/rust-analyzer/src/cli/scip.rs204
1 files changed, 146 insertions, 58 deletions
diff --git a/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/scip.rs b/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/scip.rs
index 6b0aafc2482..6526fd965a9 100644
--- a/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/scip.rs
+++ b/src/tools/rust-analyzer/crates/rust-analyzer/src/cli/scip.rs
@@ -4,14 +4,15 @@ use std::{path::PathBuf, time::Instant};
 
 use ide::{
     AnalysisHost, LineCol, Moniker, MonikerDescriptorKind, MonikerIdentifier, MonikerResult,
-    StaticIndex, StaticIndexedFile, SymbolInformationKind, TextRange, TokenId, TokenStaticData,
-    VendoredLibrariesConfig,
+    RootDatabase, StaticIndex, StaticIndexedFile, SymbolInformationKind, TextRange, TokenId,
+    TokenStaticData, VendoredLibrariesConfig,
 };
 use ide_db::LineIndexDatabase;
 use load_cargo::{load_workspace_at, LoadCargoConfig, ProcMacroServerChoice};
 use rustc_hash::{FxHashMap, FxHashSet};
-use scip::types as scip_types;
+use scip::types::{self as scip_types, SymbolInformation};
 use tracing::error;
+use vfs::FileId;
 
 use crate::{
     cli::flags,
@@ -84,26 +85,40 @@ impl flags::Scip {
             text_document_encoding: scip_types::TextEncoding::UTF8.into(),
             special_fields: Default::default(),
         };
+
         let mut documents = Vec::new();
 
+        // All TokenIds where an Occurrence has been emitted that references a symbol.
+        let mut token_ids_referenced: FxHashSet<TokenId> = FxHashSet::default();
+        // All TokenIds where the SymbolInformation has been written to the document.
         let mut token_ids_emitted: FxHashSet<TokenId> = FxHashSet::default();
-        let mut global_symbols_emitted: FxHashSet<String> = FxHashSet::default();
-        let mut duplicate_symbols: Vec<(String, String)> = Vec::new();
+        // All FileIds emitted as documents.
+        let mut file_ids_emitted: FxHashSet<FileId> = FxHashSet::default();
+
+        // All non-local symbols encountered, for detecting duplicate symbol errors.
+        let mut nonlocal_symbols_emitted: FxHashSet<String> = FxHashSet::default();
+        // List of (source_location, symbol) for duplicate symbol errors to report.
+        let mut duplicate_symbol_errors: Vec<(String, String)> = Vec::new();
+        // This is called after definitions have been deduplicated by token_ids_emitted. The purpose
+        // is to detect reuse of symbol names because this causes ambiguity about their meaning.
+        let mut record_error_if_symbol_already_used =
+            |symbol: String, relative_path: &str, line_index: &LineIndex, text_range: TextRange| {
+                let is_local = symbol.starts_with("local ");
+                if !is_local && !nonlocal_symbols_emitted.insert(symbol.clone()) {
+                    let source_location =
+                        text_range_to_string(relative_path, line_index, text_range);
+                    duplicate_symbol_errors.push((source_location, symbol));
+                }
+            };
+
+        // Generates symbols from token monikers.
         let mut symbol_generator = SymbolGenerator::new();
 
         for StaticIndexedFile { file_id, tokens, .. } in si.files {
             symbol_generator.clear_document_local_state();
 
-            let relative_path = match get_relative_filepath(&vfs, &root, file_id) {
-                Some(relative_path) => relative_path,
-                None => continue,
-            };
-
-            let line_index = LineIndex {
-                index: db.line_index(file_id),
-                encoding: PositionEncoding::Utf8,
-                endings: LineEndings::Unix,
-            };
+            let Some(relative_path) = get_relative_filepath(&vfs, &root, file_id) else { continue };
+            let line_index = get_line_index(db, file_id);
 
             let mut occurrences = Vec::new();
             let mut symbols = Vec::new();
@@ -120,54 +135,45 @@ impl flags::Scip {
                         ("".to_owned(), None)
                     };
 
-                if !symbol.is_empty() && token_ids_emitted.insert(id) {
-                    if !symbol.starts_with("local ")
-                        && !global_symbols_emitted.insert(symbol.clone())
-                    {
-                        let source_location =
-                            text_range_to_string(relative_path.as_str(), &line_index, text_range);
-                        duplicate_symbols.push((source_location, symbol.clone()));
+                if !symbol.is_empty() {
+                    let is_defined_in_this_document = match token.definition {
+                        Some(def) => def.file_id == file_id,
+                        _ => false,
+                    };
+                    if is_defined_in_this_document {
+                        if token_ids_emitted.insert(id) {
+                            // token_ids_emitted does deduplication. This checks that this results
+                            // in unique emitted symbols, as otherwise references are ambiguous.
+                            record_error_if_symbol_already_used(
+                                symbol.clone(),
+                                relative_path.as_str(),
+                                &line_index,
+                                text_range,
+                            );
+                            symbols.push(compute_symbol_info(
+                                relative_path.clone(),
+                                symbol.clone(),
+                                enclosing_symbol,
+                                token,
+                            ));
+                        }
                     } else {
-                        let documentation = match &token.documentation {
-                            Some(doc) => vec![doc.as_str().to_owned()],
-                            None => vec![],
-                        };
-
-                        let position_encoding =
-                            scip_types::PositionEncoding::UTF8CodeUnitOffsetFromLineStart.into();
-                        let signature_documentation =
-                            token.signature.clone().map(|text| scip_types::Document {
-                                relative_path: relative_path.clone(),
-                                language: "rust".to_owned(),
-                                text,
-                                position_encoding,
-                                ..Default::default()
-                            });
-                        let symbol_info = scip_types::SymbolInformation {
-                            symbol: symbol.clone(),
-                            documentation,
-                            relationships: Vec::new(),
-                            special_fields: Default::default(),
-                            kind: symbol_kind(token.kind).into(),
-                            display_name: token.display_name.clone().unwrap_or_default(),
-                            signature_documentation: signature_documentation.into(),
-                            enclosing_symbol: enclosing_symbol.unwrap_or_default(),
-                        };
-
-                        symbols.push(symbol_info)
+                        token_ids_referenced.insert(id);
                     }
                 }
 
                 // If the range of the def and the range of the token are the same, this must be the definition.
                 // they also must be in the same file. See https://github.com/rust-lang/rust-analyzer/pull/17988
-                let mut symbol_roles = Default::default();
-                match token.definition {
-                    Some(def) if def.file_id == file_id && def.range == text_range => {
-                        symbol_roles |= scip_types::SymbolRole::Definition as i32;
-                    }
-                    _ => {}
+                let is_definition = match token.definition {
+                    Some(def) => def.file_id == file_id && def.range == text_range,
+                    _ => false,
                 };
 
+                let mut symbol_roles = Default::default();
+                if is_definition {
+                    symbol_roles |= scip_types::SymbolRole::Definition as i32;
+                }
+
                 occurrences.push(scip_types::Occurrence {
                     range: text_range_to_scip_range(&line_index, text_range),
                     symbol,
@@ -195,18 +201,61 @@ impl flags::Scip {
                 position_encoding,
                 special_fields: Default::default(),
             });
+            if !file_ids_emitted.insert(file_id) {
+                panic!("Invariant violation: file emitted multiple times.");
+            }
+        }
+
+        // Collect all symbols referenced by the files but not defined within them.
+        let mut external_symbols = Vec::new();
+        for id in token_ids_referenced.difference(&token_ids_emitted) {
+            let id = *id;
+            let token = si.tokens.get(id).unwrap();
+
+            let Some(definition) = token.definition else {
+                break;
+            };
+
+            let file_id = definition.file_id;
+            let Some(relative_path) = get_relative_filepath(&vfs, &root, file_id) else { continue };
+            let line_index = get_line_index(db, file_id);
+            let text_range = definition.range;
+            if file_ids_emitted.contains(&file_id) {
+                tracing::error!(
+                    "Bug: definition at {} should have been in an SCIP document but was not.",
+                    text_range_to_string(relative_path.as_str(), &line_index, text_range)
+                );
+                continue;
+            }
+
+            let TokenSymbols { symbol, enclosing_symbol } = symbol_generator
+                .token_symbols(id, token)
+                .expect("To have been referenced, the symbol must be in the cache.");
+
+            record_error_if_symbol_already_used(
+                symbol.clone(),
+                relative_path.as_str(),
+                &line_index,
+                text_range,
+            );
+            external_symbols.push(compute_symbol_info(
+                relative_path.clone(),
+                symbol.clone(),
+                enclosing_symbol,
+                token,
+            ));
         }
 
         let index = scip_types::Index {
             metadata: Some(metadata).into(),
             documents,
-            external_symbols: Vec::new(),
+            external_symbols,
             special_fields: Default::default(),
         };
 
-        if !duplicate_symbols.is_empty() {
+        if !duplicate_symbol_errors.is_empty() {
             eprintln!("{}", DUPLICATE_SYMBOLS_MESSAGE);
-            for (source_location, symbol) in duplicate_symbols {
+            for (source_location, symbol) in duplicate_symbol_errors {
                 eprintln!("{}", source_location);
                 eprintln!("  Duplicate symbol: {}", symbol);
                 eprintln!();
@@ -239,6 +288,37 @@ Known cases that can cause this:
 Duplicate symbols encountered:
 ";
 
+fn compute_symbol_info(
+    relative_path: String,
+    symbol: String,
+    enclosing_symbol: Option<String>,
+    token: &TokenStaticData,
+) -> SymbolInformation {
+    let documentation = match &token.documentation {
+        Some(doc) => vec![doc.as_str().to_owned()],
+        None => vec![],
+    };
+
+    let position_encoding = scip_types::PositionEncoding::UTF8CodeUnitOffsetFromLineStart.into();
+    let signature_documentation = token.signature.clone().map(|text| scip_types::Document {
+        relative_path,
+        language: "rust".to_owned(),
+        text,
+        position_encoding,
+        ..Default::default()
+    });
+    scip_types::SymbolInformation {
+        symbol,
+        documentation,
+        relationships: Vec::new(),
+        special_fields: Default::default(),
+        kind: symbol_kind(token.kind).into(),
+        display_name: token.display_name.clone().unwrap_or_default(),
+        signature_documentation: signature_documentation.into(),
+        enclosing_symbol: enclosing_symbol.unwrap_or_default(),
+    }
+}
+
 fn get_relative_filepath(
     vfs: &vfs::Vfs,
     rootpath: &vfs::AbsPathBuf,
@@ -247,6 +327,14 @@ fn get_relative_filepath(
     Some(vfs.file_path(file_id).as_path()?.strip_prefix(rootpath)?.as_str().to_owned())
 }
 
+fn get_line_index(db: &RootDatabase, file_id: FileId) -> LineIndex {
+    LineIndex {
+        index: db.line_index(file_id),
+        encoding: PositionEncoding::Utf8,
+        endings: LineEndings::Unix,
+    }
+}
+
 // SCIP Ranges have a (very large) optimization that ranges if they are on the same line
 // only encode as a vector of [start_line, start_col, end_col].
 //