diff options
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs')
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs | 299 |
1 files changed, 84 insertions, 215 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 9a2473d6cf2..55b1e728b70 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -5,15 +5,11 @@ use rustc_abi::Align; use rustc_codegen_ssa::traits::{ BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods, }; -use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; -use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_data_structures::fx::FxIndexMap; use rustc_index::IndexVec; -use rustc_middle::mir; -use rustc_middle::mir::mono::MonoItemPartitions; -use rustc_middle::ty::{self, TyCtxt}; +use rustc_middle::ty::TyCtxt; use rustc_session::RemapFileNameExt; use rustc_session::config::RemapPathScopeComponents; -use rustc_span::def_id::DefIdSet; use rustc_span::{SourceFile, StableSourceFileId}; use tracing::debug; @@ -24,6 +20,7 @@ use crate::llvm; mod covfun; mod spans; +mod unused; /// Generates and exports the coverage map, which is embedded in special /// linker sections in the final binary. @@ -56,13 +53,6 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { None => return, }; - // The order of entries in this global file table needs to be deterministic, - // and ideally should also be independent of the details of stable-hashing, - // because coverage tests snapshots (`.cov-map`) can observe the order and - // would need to be re-blessed if it changes. As long as those requirements - // are satisfied, the order can be arbitrary. - let mut global_file_table = GlobalFileTable::new(); - let mut covfun_records = instances_used .iter() .copied() @@ -70,18 +60,13 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { // order that doesn't depend on the stable-hash-based order in which // instances were visited during codegen. .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name) - .filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true)) + .filter_map(|instance| prepare_covfun_record(tcx, instance, true)) .collect::<Vec<_>>(); // In a single designated CGU, also prepare covfun records for functions // in this crate that were instrumented for coverage, but are unused. if cx.codegen_unit.is_code_coverage_dead_code_cgu() { - let mut unused_instances = gather_unused_function_instances(cx); - // Sort the unused instances by symbol name, for the same reason as the used ones. - unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name); - covfun_records.extend(unused_instances.into_iter().filter_map(|instance| { - prepare_covfun_record(tcx, &mut global_file_table, instance, false) - })); + unused::prepare_covfun_records_for_unused_functions(cx, &mut covfun_records); } // If there are no covfun records for this CGU, don't generate a covmap record. @@ -93,91 +78,88 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { return; } - // Encode all filenames referenced by coverage mappings in this CGU. - let filenames_buffer = global_file_table.make_filenames_buffer(tcx); - // The `llvm-cov` tool uses this hash to associate each covfun record with - // its corresponding filenames table, since the final binary will typically - // contain multiple covmap records from different compilation units. - let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer); - - let mut unused_function_names = vec![]; + // Prepare the global file table for this CGU, containing all paths needed + // by one or more covfun records. + let global_file_table = + GlobalFileTable::build(tcx, covfun_records.iter().flat_map(|c| c.all_source_files())); for covfun in &covfun_records { - unused_function_names.extend(covfun.mangled_function_name_if_unused()); - - covfun::generate_covfun_record(cx, filenames_hash, covfun) - } - - // For unused functions, we need to take their mangled names and store them - // in a specially-named global array. LLVM's `InstrProfiling` pass will - // detect this global and include those names in its `__llvm_prf_names` - // section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.) - if !unused_function_names.is_empty() { - assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); - - let name_globals = unused_function_names - .into_iter() - .map(|mangled_function_name| cx.const_str(mangled_function_name).0) - .collect::<Vec<_>>(); - let initializer = cx.const_array(cx.type_ptr(), &name_globals); - - let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names"); - llvm::set_global_constant(array, true); - llvm::set_linkage(array, llvm::Linkage::InternalLinkage); - llvm::set_initializer(array, initializer); + covfun::generate_covfun_record(cx, &global_file_table, covfun) } // Generate the coverage map header, which contains the filenames used by // this CGU's coverage mappings, and store it in a well-known global. // (This is skipped if we returned early due to having no covfun records.) - generate_covmap_record(cx, covmap_version, &filenames_buffer); + generate_covmap_record(cx, covmap_version, &global_file_table.filenames_buffer); } -/// Maps "global" (per-CGU) file ID numbers to their underlying source files. +/// Maps "global" (per-CGU) file ID numbers to their underlying source file paths. +#[derive(Debug)] struct GlobalFileTable { /// This "raw" table doesn't include the working dir, so a file's /// global ID is its index in this set **plus one**. - raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>, + raw_file_table: FxIndexMap<StableSourceFileId, String>, + + /// The file table in encoded form (possibly compressed), which can be + /// included directly in this CGU's `__llvm_covmap` record. + filenames_buffer: Vec<u8>, + + /// Truncated hash of the bytes in `filenames_buffer`. + /// + /// The `llvm-cov` tool uses this hash to associate each covfun record with + /// its corresponding filenames table, since the final binary will typically + /// contain multiple covmap records from different compilation units. + filenames_hash: u64, } impl GlobalFileTable { - fn new() -> Self { - Self { raw_file_table: FxIndexMap::default() } - } + /// Builds a "global file table" for this CGU, mapping numeric IDs to + /// path strings. + fn build<'a>(tcx: TyCtxt<'_>, all_files: impl Iterator<Item = &'a SourceFile>) -> Self { + let mut raw_file_table = FxIndexMap::default(); + + for file in all_files { + raw_file_table.entry(file.stable_id).or_insert_with(|| { + file.name + .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) + .to_string_lossy() + .into_owned() + }); + } - fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId { - // Ensure the given file has a table entry, and get its index. - let entry = self.raw_file_table.entry(file.stable_id); - let raw_id = entry.index(); - entry.or_insert_with(|| Arc::clone(file)); + // FIXME(Zalathar): Consider sorting the file table here, but maybe + // only after adding filename support to coverage-dump, so that the + // table order isn't directly visible in `.coverage-map` snapshots. - // The raw file table doesn't include an entry for the working dir - // (which has ID 0), so add 1 to get the correct ID. - GlobalFileId::from_usize(raw_id + 1) - } + let mut table = Vec::with_capacity(raw_file_table.len() + 1); - fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> { - let mut table = Vec::with_capacity(self.raw_file_table.len() + 1); - - // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) - // requires setting the first filename to the compilation directory. - // Since rustc generates coverage maps with relative paths, the - // compilation directory can be combined with the relative paths - // to get absolute paths, if needed. - table.push( - tcx.sess - .opts - .working_dir - .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) - .to_string_lossy(), - ); + // Since version 6 of the LLVM coverage mapping format, the first entry + // in the global file table is treated as a base directory, used to + // resolve any other entries that are stored as relative paths. + let base_dir = tcx + .sess + .opts + .working_dir + .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) + .to_string_lossy(); + table.push(base_dir.as_ref()); // Add the regular entries after the base directory. - table.extend(self.raw_file_table.values().map(|file| { - file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy() - })); + table.extend(raw_file_table.values().map(|name| name.as_str())); - llvm_cov::write_filenames_to_buffer(&table) + // Encode the file table into a buffer, and get the hash of its encoded + // bytes, so that we can embed that hash in `__llvm_covfun` records. + let filenames_buffer = llvm_cov::write_filenames_to_buffer(&table); + let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer); + + Self { raw_file_table, filenames_buffer, filenames_hash } + } + + fn get_existing_id(&self, file: &SourceFile) -> Option<GlobalFileId> { + let raw_id = self.raw_file_table.get_index_of(&file.stable_id)?; + // The raw file table doesn't include an entry for the base dir + // (which has ID 0), so add 1 to get the correct ID. + Some(GlobalFileId::from_usize(raw_id + 1)) } } @@ -193,26 +175,31 @@ rustc_index::newtype_index! { struct LocalFileId {} } -/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) -/// file IDs. +/// Holds a mapping from "local" (per-function) file IDs to their corresponding +/// source files. #[derive(Debug, Default)] struct VirtualFileMapping { - local_to_global: IndexVec<LocalFileId, GlobalFileId>, - global_to_local: FxIndexMap<GlobalFileId, LocalFileId>, + local_file_table: IndexVec<LocalFileId, Arc<SourceFile>>, } impl VirtualFileMapping { - fn local_id_for_global(&mut self, global_file_id: GlobalFileId) -> LocalFileId { - *self - .global_to_local - .entry(global_file_id) - .or_insert_with(|| self.local_to_global.push(global_file_id)) + fn push_file(&mut self, source_file: &Arc<SourceFile>) -> LocalFileId { + self.local_file_table.push(Arc::clone(source_file)) } - fn to_vec(&self) -> Vec<u32> { - // This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`, - // but it isn't hot or expensive enough to justify the extra unsafety. - self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect() + /// Resolves all of the filenames in this local file mapping to a list of + /// global file IDs in its CGU, for inclusion in this function's + /// `__llvm_covfun` record. + /// + /// The global file IDs are returned as `u32` to make FFI easier. + fn resolve_all(&self, global_file_table: &GlobalFileTable) -> Option<Vec<u32>> { + self.local_file_table + .iter() + .map(|file| try { + let id = global_file_table.get_existing_id(file)?; + GlobalFileId::as_u32(id) + }) + .collect::<Option<Vec<_>>>() } } @@ -249,121 +236,3 @@ fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_ cx.add_used_global(covmap_global); } - -/// Each CGU will normally only emit coverage metadata for the functions that it actually generates. -/// But since we don't want unused functions to disappear from coverage reports, we also scan for -/// functions that were instrumented but are not participating in codegen. -/// -/// These unused functions don't need to be codegenned, but we do need to add them to the function -/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them. -/// We also end up adding their symbol names to a special global array that LLVM will include in -/// its embedded coverage data. -fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> { - assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); - - let tcx = cx.tcx; - let usage = prepare_usage_sets(tcx); - - let is_unused_fn = |def_id: LocalDefId| -> bool { - // Usage sets expect `DefId`, so convert from `LocalDefId`. - let d: DefId = LocalDefId::to_def_id(def_id); - // To be potentially eligible for "unused function" mappings, a definition must: - // - Be eligible for coverage instrumentation - // - Not participate directly in codegen (or have lost all its coverage statements) - // - Not have any coverage statements inlined into codegenned functions - tcx.is_eligible_for_coverage(def_id) - && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d)) - && !usage.used_via_inlining.contains(&d) - }; - - // FIXME(#79651): Consider trying to filter out dummy instantiations of - // unused generic functions from library crates, because they can produce - // "unused instantiation" in coverage reports even when they are actually - // used by some downstream crate in the same binary. - - tcx.mir_keys(()) - .iter() - .copied() - .filter(|&def_id| is_unused_fn(def_id)) - .map(|def_id| make_dummy_instance(tcx, def_id)) - .collect::<Vec<_>>() -} - -struct UsageSets<'tcx> { - all_mono_items: &'tcx DefIdSet, - used_via_inlining: FxHashSet<DefId>, - missing_own_coverage: FxHashSet<DefId>, -} - -/// Prepare sets of definitions that are relevant to deciding whether something -/// is an "unused function" for coverage purposes. -fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> { - let MonoItemPartitions { all_mono_items, codegen_units, .. } = - tcx.collect_and_partition_mono_items(()); - - // Obtain a MIR body for each function participating in codegen, via an - // arbitrary instance. - let mut def_ids_seen = FxHashSet::default(); - let def_and_mir_for_all_mono_fns = codegen_units - .iter() - .flat_map(|cgu| cgu.items().keys()) - .filter_map(|item| match item { - mir::mono::MonoItem::Fn(instance) => Some(instance), - mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None, - }) - // We only need one arbitrary instance per definition. - .filter(move |instance| def_ids_seen.insert(instance.def_id())) - .map(|instance| { - // We don't care about the instance, just its underlying MIR. - let body = tcx.instance_mir(instance.def); - (instance.def_id(), body) - }); - - // Functions whose coverage statements were found inlined into other functions. - let mut used_via_inlining = FxHashSet::default(); - // Functions that were instrumented, but had all of their coverage statements - // removed by later MIR transforms (e.g. UnreachablePropagation). - let mut missing_own_coverage = FxHashSet::default(); - - for (def_id, body) in def_and_mir_for_all_mono_fns { - let mut saw_own_coverage = false; - - // Inspect every coverage statement in the function's MIR. - for stmt in body - .basic_blocks - .iter() - .flat_map(|block| &block.statements) - .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_))) - { - if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) { - // This coverage statement was inlined from another function. - used_via_inlining.insert(inlined.def_id()); - } else { - // Non-inlined coverage statements belong to the enclosing function. - saw_own_coverage = true; - } - } - - if !saw_own_coverage && body.function_coverage_info.is_some() { - missing_own_coverage.insert(def_id); - } - } - - UsageSets { all_mono_items, used_via_inlining, missing_own_coverage } -} - -fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> { - let def_id = local_def_id.to_def_id(); - - // Make a dummy instance that fills in all generics with placeholders. - ty::Instance::new( - def_id, - ty::GenericArgs::for_item(tcx, def_id, |param, _| { - if let ty::GenericParamDefKind::Lifetime = param.kind { - tcx.lifetimes.re_erased.into() - } else { - tcx.mk_param_from_def(param) - } - }), - ) -} |
