about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2023-10-23 08:12:39 +0200
committerGitHub <noreply@github.com>2023-10-23 08:12:39 +0200
commitdde77f7a33ab64115102ba7d4bfd6ab9bb46253e (patch)
tree323457aaae0fe169e4eb71334558dd45ef224828 /compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
parente86f9b6fceda43cecd839e288f40622eadd9e639 (diff)
parent6af9fef08590fd499370a2f6cbbae9ceacf15336 (diff)
downloadrust-dde77f7a33ab64115102ba7d4bfd6ab9bb46253e.tar.gz
rust-dde77f7a33ab64115102ba7d4bfd6ab9bb46253e.zip
Rollup merge of #117042 - Zalathar:file-table, r=cjgillot
coverage: Emit the filenames section before encoding per-function mappings

When embedding coverage information in LLVM IR (and ultimately in the resulting binary), there are two main things that each CGU needs to emit:

- A single `__llvm_covmap` record containing a coverage header, which mostly consists of a list of filenames used by the CGU's coverage mappings.
- Several `__llvm_covfun` records, one for each instrumented function, each of which contains the hash of the list of filenames in the header.

There is a kind of loose cyclic dependency between the two: we need the hash of the file table before we can emit the covfun records, but we need to traverse all of the instrumented functions in order to build the file table.

The existing code works by processing the individual functions first. It lazily adds filenames to the file table, and stores the mostly-complete function records in a temporary list. After this it hashes the file table, emits the header (containing the file table), and then uses the hash to emit all of the function records.

This PR reverses that order: first we traverse all of the functions (without trying to prepare their function records) to build a *complete* file table, and then emit it immediately. At this point we have the file table hash, so we can then proceed to build and emit all of the function records, without needing to store them in an intermediate list.

---

Along the way, this PR makes some necessary changes that are also worthwhile in their own right:
- We split `FunctionCoverage` into distinct collector/finished phases, which neatly avoids some borrow-checker hassles when extracting a function's final expression/mapping data.
- We avoid having to re-sort a function's mappings when preparing the list of filenames that it uses.
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs')
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs162
1 files changed, 100 insertions, 62 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index 2f825b801ac..274e0aeaaba 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -1,11 +1,12 @@
 use crate::common::CodegenCx;
 use crate::coverageinfo;
 use crate::coverageinfo::ffi::CounterMappingRegion;
-use crate::coverageinfo::map_data::FunctionCoverage;
+use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector};
 use crate::llvm;
 
+use itertools::Itertools as _;
 use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods};
-use rustc_data_structures::fx::FxIndexSet;
+use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
 use rustc_hir::def::DefKind;
 use rustc_hir::def_id::DefId;
 use rustc_index::IndexVec;
@@ -57,11 +58,32 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
         return;
     }
 
-    let mut global_file_table = GlobalFileTable::new(tcx);
+    let function_coverage_entries = function_coverage_map
+        .into_iter()
+        .map(|(instance, function_coverage)| (instance, function_coverage.into_finished()))
+        .collect::<Vec<_>>();
+
+    let all_file_names =
+        function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names());
+    let global_file_table = GlobalFileTable::new(all_file_names);
+
+    // Encode all filenames referenced by coverage mappings in this CGU.
+    let filenames_buffer = global_file_table.make_filenames_buffer(tcx);
+
+    let filenames_size = filenames_buffer.len();
+    let filenames_val = cx.const_bytes(&filenames_buffer);
+    let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer);
+
+    // Generate the coverage map header, which contains the filenames used by
+    // this CGU's coverage mappings, and store it in a well-known global.
+    let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val);
+    coverageinfo::save_cov_data_to_mod(cx, cov_data_val);
+
+    let mut unused_function_names = Vec::new();
+    let covfun_section_name = coverageinfo::covfun_section_name(cx);
 
     // Encode coverage mappings and generate function records
-    let mut function_data = Vec::new();
-    for (instance, function_coverage) in function_coverage_map {
+    for (instance, function_coverage) in function_coverage_entries {
         debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
 
         let mangled_function_name = tcx.symbol_name(instance).name;
@@ -69,7 +91,7 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
         let is_used = function_coverage.is_used();
 
         let coverage_mapping_buffer =
-            encode_mappings_for_function(&mut global_file_table, &function_coverage);
+            encode_mappings_for_function(&global_file_table, &function_coverage);
 
         if coverage_mapping_buffer.is_empty() {
             if function_coverage.is_used() {
@@ -83,23 +105,6 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
             }
         }
 
-        function_data.push((mangled_function_name, source_hash, is_used, coverage_mapping_buffer));
-    }
-
-    // Encode all filenames referenced by counters/expressions in this module
-    let filenames_buffer = global_file_table.into_filenames_buffer();
-
-    let filenames_size = filenames_buffer.len();
-    let filenames_val = cx.const_bytes(&filenames_buffer);
-    let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer);
-
-    // Generate the LLVM IR representation of the coverage map and store it in a well-known global
-    let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val);
-
-    let mut unused_function_names = Vec::new();
-
-    let covfun_section_name = coverageinfo::covfun_section_name(cx);
-    for (mangled_function_name, source_hash, is_used, coverage_mapping_buffer) in function_data {
         if !is_used {
             unused_function_names.push(mangled_function_name);
         }
@@ -133,92 +138,125 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
         llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
         llvm::set_initializer(array, initializer);
     }
-
-    // Save the coverage data value to LLVM IR
-    coverageinfo::save_cov_data_to_mod(cx, cov_data_val);
 }
 
+/// Maps "global" (per-CGU) file ID numbers to their underlying filenames.
 struct GlobalFileTable {
-    global_file_table: FxIndexSet<Symbol>,
+    /// This "raw" table doesn't include the working dir, so a filename's
+    /// global ID is its index in this set **plus one**.
+    raw_file_table: FxIndexSet<Symbol>,
 }
 
 impl GlobalFileTable {
-    fn new(tcx: TyCtxt<'_>) -> Self {
-        let mut global_file_table = FxIndexSet::default();
+    fn new(all_file_names: impl IntoIterator<Item = Symbol>) -> Self {
+        // Collect all of the filenames into a set. Filenames usually come in
+        // contiguous runs, so we can dedup adjacent ones to save work.
+        let mut raw_file_table = all_file_names.into_iter().dedup().collect::<FxIndexSet<Symbol>>();
+
+        // Sort the file table by its actual string values, not the arbitrary
+        // ordering of its symbols.
+        raw_file_table.sort_unstable_by(|a, b| a.as_str().cmp(b.as_str()));
+
+        Self { raw_file_table }
+    }
+
+    fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 {
+        let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| {
+            bug!("file name not found in prepared global file table: {file_name}");
+        });
+        // The raw file table doesn't include an entry for the working dir
+        // (which has ID 0), so add 1 to get the correct ID.
+        (raw_id + 1) as u32
+    }
+
+    fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> {
         // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
         // requires setting the first filename to the compilation directory.
         // Since rustc generates coverage maps with relative paths, the
         // compilation directory can be combined with the relative paths
         // to get absolute paths, if needed.
         use rustc_session::RemapFileNameExt;
-        let working_dir =
-            Symbol::intern(&tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy());
-        global_file_table.insert(working_dir);
-        Self { global_file_table }
-    }
-
-    fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 {
-        let (global_file_id, _) = self.global_file_table.insert_full(file_name);
-        global_file_id as u32
-    }
-
-    fn into_filenames_buffer(self) -> Vec<u8> {
-        // This method takes `self` so that the caller can't accidentally
-        // modify the original file table after encoding it into a buffer.
+        let working_dir: &str = &tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy();
 
         llvm::build_byte_buffer(|buffer| {
             coverageinfo::write_filenames_section_to_buffer(
-                self.global_file_table.iter().map(Symbol::as_str),
+                // Insert the working dir at index 0, before the other filenames.
+                std::iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str)),
                 buffer,
             );
         })
     }
 }
 
+rustc_index::newtype_index! {
+    // Tell the newtype macro to not generate `Encode`/`Decode` impls.
+    #[custom_encodable]
+    struct LocalFileId {}
+}
+
+/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU)
+/// file IDs.
+#[derive(Default)]
+struct VirtualFileMapping {
+    local_to_global: IndexVec<LocalFileId, u32>,
+    global_to_local: FxIndexMap<u32, LocalFileId>,
+}
+
+impl VirtualFileMapping {
+    fn local_id_for_global(&mut self, global_file_id: u32) -> LocalFileId {
+        *self
+            .global_to_local
+            .entry(global_file_id)
+            .or_insert_with(|| self.local_to_global.push(global_file_id))
+    }
+
+    fn into_vec(self) -> Vec<u32> {
+        self.local_to_global.raw
+    }
+}
+
 /// Using the expressions and counter regions collected for a single function,
 /// generate the variable-sized payload of its corresponding `__llvm_covfun`
 /// entry. The payload is returned as a vector of bytes.
 ///
 /// Newly-encountered filenames will be added to the global file table.
 fn encode_mappings_for_function(
-    global_file_table: &mut GlobalFileTable,
+    global_file_table: &GlobalFileTable,
     function_coverage: &FunctionCoverage<'_>,
 ) -> Vec<u8> {
-    let (expressions, counter_regions) = function_coverage.get_expressions_and_counter_regions();
-
-    let mut counter_regions = counter_regions.collect::<Vec<_>>();
+    let counter_regions = function_coverage.counter_regions();
     if counter_regions.is_empty() {
         return Vec::new();
     }
 
-    let mut virtual_file_mapping = IndexVec::<u32, u32>::new();
+    let expressions = function_coverage.counter_expressions().collect::<Vec<_>>();
+
+    let mut virtual_file_mapping = VirtualFileMapping::default();
     let mut mapping_regions = Vec::with_capacity(counter_regions.len());
 
-    // Sort and group the list of (counter, region) mapping pairs by filename.
-    // (Preserve any further ordering imposed by `FunctionCoverage`.)
+    // Group mappings into runs with the same filename, preserving the order
+    // yielded by `FunctionCoverage`.
     // Prepare file IDs for each filename, and prepare the mapping data so that
     // we can pass it through FFI to LLVM.
-    counter_regions.sort_by_key(|(_counter, region)| region.file_name);
-    for counter_regions_for_file in
-        counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
+    for (file_name, counter_regions_for_file) in
+        &counter_regions.group_by(|(_counter, region)| region.file_name)
     {
-        // Look up (or allocate) the global file ID for this filename.
-        let file_name = counter_regions_for_file[0].1.file_name;
+        // Look up the global file ID for this filename.
         let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
 
         // Associate that global file ID with a local file ID for this function.
-        let local_file_id: u32 = virtual_file_mapping.push(global_file_id);
-        debug!("  file id: local {local_file_id} => global {global_file_id} = '{file_name:?}'");
+        let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id);
+        debug!("  file id: {local_file_id:?} => global {global_file_id} = '{file_name:?}'");
 
         // For each counter/region pair in this function+file, convert it to a
         // form suitable for FFI.
-        for &(counter, region) in counter_regions_for_file {
+        for (counter, region) in counter_regions_for_file {
             let CodeRegion { file_name: _, start_line, start_col, end_line, end_col } = *region;
 
             debug!("Adding counter {counter:?} to map for {region:?}");
             mapping_regions.push(CounterMappingRegion::code_region(
                 counter,
-                local_file_id,
+                local_file_id.as_u32(),
                 start_line,
                 start_col,
                 end_line,
@@ -230,7 +268,7 @@ fn encode_mappings_for_function(
     // Encode the function's coverage mappings into a buffer.
     llvm::build_byte_buffer(|buffer| {
         coverageinfo::write_mapping_to_buffer(
-            virtual_file_mapping.raw,
+            virtual_file_mapping.into_vec(),
             expressions,
             mapping_regions,
             buffer,
@@ -419,7 +457,7 @@ fn add_unused_function_coverage<'tcx>(
 ) {
     // An unused function's mappings will automatically be rewritten to map to
     // zero, because none of its counters/expressions are marked as seen.
-    let function_coverage = FunctionCoverage::unused(instance, function_coverage_info);
+    let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info);
 
     if let Some(coverage_context) = cx.coverage_context() {
         coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage);