diff options
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo/mapgen')
3 files changed, 254 insertions, 76 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs index 80e54bf045e..7bdbc685952 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs @@ -5,6 +5,7 @@ //! [^win]: On Windows the section name is `.lcovfun`. use std::ffi::CString; +use std::sync::Arc; use rustc_abi::Align; use rustc_codegen_ssa::traits::{ @@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{ MappingKind, Op, }; use rustc_middle::ty::{Instance, TyCtxt}; -use rustc_span::Span; +use rustc_span::{SourceFile, Span}; use rustc_target::spec::HasTargetSpec; use tracing::debug; @@ -38,16 +39,15 @@ pub(crate) struct CovfunRecord<'tcx> { } impl<'tcx> CovfunRecord<'tcx> { - /// FIXME(Zalathar): Make this the responsibility of the code that determines - /// which functions are unused. - pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> { - (!self.is_used).then_some(self.mangled_function_name) + /// Iterator that yields all source files referred to by this function's + /// coverage mappings. Used to build the global file table for the CGU. + pub(crate) fn all_source_files(&self) -> impl Iterator<Item = &SourceFile> { + self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref) } } pub(crate) fn prepare_covfun_record<'tcx>( tcx: TyCtxt<'tcx>, - global_file_table: &mut GlobalFileTable, instance: Instance<'tcx>, is_used: bool, ) -> Option<CovfunRecord<'tcx>> { @@ -65,7 +65,7 @@ pub(crate) fn prepare_covfun_record<'tcx>( regions: ffi::Regions::default(), }; - fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun); + fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun); if covfun.regions.has_no_regions() { debug!(?covfun, "function has no mappings to embed; skipping"); @@ -100,28 +100,37 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression /// Populates the mapping region tables in the current function's covfun record. fn fill_region_tables<'tcx>( tcx: TyCtxt<'tcx>, - global_file_table: &mut GlobalFileTable, fn_cov_info: &'tcx FunctionCoverageInfo, ids_info: &'tcx CoverageIdsInfo, covfun: &mut CovfunRecord<'tcx>, ) { - // Currently a function's mappings must all be in the same file as its body span. + // Currently a function's mappings must all be in the same file, so use the + // first mapping's span to determine the file. let source_map = tcx.sess.source_map(); - let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo()); - - // Look up the global file ID for that file. - let global_file_id = global_file_table.global_file_id_for_file(&source_file); - - // Associate that global file ID with a local file ID for this function. - let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id); + let Some(first_span) = (try { fn_cov_info.mappings.first()?.span }) else { + debug_assert!(false, "function has no mappings: {:?}", covfun.mangled_function_name); + return; + }; + let source_file = source_map.lookup_source_file(first_span.lo()); - let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } = - &mut covfun.regions; + let local_file_id = covfun.virtual_file_mapping.push_file(&source_file); - let make_cov_span = |span: Span| { - spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span) - }; + // In rare cases, _all_ of a function's spans are discarded, and coverage + // codegen needs to handle that gracefully to avoid #133606. + // It's hard for tests to trigger this organically, so instead we set + // `-Zcoverage-options=discard-all-spans-in-codegen` to force it to occur. let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen(); + let make_coords = |span: Span| { + if discard_all { None } else { spans::make_coords(source_map, &source_file, span) } + }; + + let ffi::Regions { + code_regions, + expansion_regions: _, // FIXME(Zalathar): Fill out support for expansion regions + branch_regions, + mcdc_branch_regions, + mcdc_decision_regions, + } = &mut covfun.regions; // For each counter/region pair in this function+file, convert it to a // form suitable for FFI. @@ -136,17 +145,8 @@ fn fill_region_tables<'tcx>( ffi::Counter::from_term(term) }; - // Convert the `Span` into coordinates that we can pass to LLVM, or - // discard the span if conversion fails. In rare, cases _all_ of a - // function's spans are discarded, and the rest of coverage codegen - // needs to handle that gracefully to avoid a repeat of #133606. - // We don't have a good test case for triggering that organically, so - // instead we set `-Zcoverage-options=discard-all-spans-in-codegen` - // to force it to occur. - let Some(cov_span) = make_cov_span(span) else { continue }; - if discard_all { - continue; - } + let Some(coords) = make_coords(span) else { continue }; + let cov_span = coords.make_coverage_span(local_file_id); match *kind { MappingKind::Code { bcb } => { @@ -182,7 +182,7 @@ fn fill_region_tables<'tcx>( /// as a global variable in the `__llvm_covfun` section. pub(crate) fn generate_covfun_record<'tcx>( cx: &CodegenCx<'_, 'tcx>, - filenames_hash: u64, + global_file_table: &GlobalFileTable, covfun: &CovfunRecord<'tcx>, ) { let &CovfunRecord { @@ -194,12 +194,19 @@ pub(crate) fn generate_covfun_record<'tcx>( ref regions, } = covfun; + let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else { + debug_assert!( + false, + "all local files should be present in the global file table: \ + global_file_table = {global_file_table:?}, \ + virtual_file_mapping = {virtual_file_mapping:?}" + ); + return; + }; + // Encode the function's coverage mappings into a buffer. - let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer( - &virtual_file_mapping.to_vec(), - expressions, - regions, - ); + let coverage_mapping_buffer = + llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions); // A covfun record consists of four target-endian integers, followed by the // encoded mapping data in bytes. Note that the length field is 32 bits. @@ -212,7 +219,7 @@ pub(crate) fn generate_covfun_record<'tcx>( cx.const_u64(func_name_hash), cx.const_u32(coverage_mapping_buffer.len() as u32), cx.const_u64(source_hash), - cx.const_u64(filenames_hash), + cx.const_u64(global_file_table.filenames_hash), cx.const_bytes(&coverage_mapping_buffer), ], // This struct needs to be packed, so that the 32-bit length field diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs index 6d1d91340c2..39a59560c9d 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs @@ -1,4 +1,3 @@ -use rustc_middle::mir::coverage::FunctionCoverageInfo; use rustc_span::source_map::SourceMap; use rustc_span::{BytePos, Pos, SourceFile, Span}; use tracing::debug; @@ -6,24 +5,41 @@ use tracing::debug; use crate::coverageinfo::ffi; use crate::coverageinfo::mapgen::LocalFileId; +/// Line and byte-column coordinates of a source code span within some file. +/// The file itself must be tracked separately. +#[derive(Clone, Copy, Debug)] +pub(crate) struct Coords { + /// 1-based starting line of the source code span. + pub(crate) start_line: u32, + /// 1-based starting column (in bytes) of the source code span. + pub(crate) start_col: u32, + /// 1-based ending line of the source code span. + pub(crate) end_line: u32, + /// 1-based ending column (in bytes) of the source code span. High bit must be unset. + pub(crate) end_col: u32, +} + +impl Coords { + /// Attaches a local file ID to these coordinates to produce an `ffi::CoverageSpan`. + pub(crate) fn make_coverage_span(&self, local_file_id: LocalFileId) -> ffi::CoverageSpan { + let &Self { start_line, start_col, end_line, end_col } = self; + let file_id = local_file_id.as_u32(); + ffi::CoverageSpan { file_id, start_line, start_col, end_line, end_col } + } +} + /// Converts the span into its start line and column, and end line and column. /// /// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by /// the compiler, these column numbers are denoted in **bytes**, because that's what /// LLVM's `llvm-cov` tool expects to see in coverage maps. /// -/// Returns `None` if the conversion failed for some reason. This shouldn't happen, +/// Returns `None` if the conversion failed for some reason. This should be uncommon, /// but it's hard to rule out entirely (especially in the presence of complex macros /// or other expansions), and if it does happen then skipping a span or function is /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid. -pub(crate) fn make_coverage_span( - file_id: LocalFileId, - source_map: &SourceMap, - fn_cov_info: &FunctionCoverageInfo, - file: &SourceFile, - span: Span, -) -> Option<ffi::CoverageSpan> { - let span = ensure_non_empty_span(source_map, fn_cov_info, span)?; +pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) -> Option<Coords> { + let span = ensure_non_empty_span(source_map, span)?; let lo = span.lo(); let hi = span.hi(); @@ -46,8 +62,7 @@ pub(crate) fn make_coverage_span( start_line = source_map.doctest_offset_line(&file.name, start_line); end_line = source_map.doctest_offset_line(&file.name, end_line); - check_coverage_span(ffi::CoverageSpan { - file_id: file_id.as_u32(), + check_coords(Coords { start_line: start_line as u32, start_col: start_col as u32, end_line: end_line as u32, @@ -55,36 +70,22 @@ pub(crate) fn make_coverage_span( }) } -fn ensure_non_empty_span( - source_map: &SourceMap, - fn_cov_info: &FunctionCoverageInfo, - span: Span, -) -> Option<Span> { +fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> { if !span.is_empty() { return Some(span); } - let lo = span.lo(); - let hi = span.hi(); - - // The span is empty, so try to expand it to cover an adjacent '{' or '}', - // but only within the bounds of the body span. - let try_next = hi < fn_cov_info.body_span.hi(); - let try_prev = fn_cov_info.body_span.lo() < lo; - if !(try_next || try_prev) { - return None; - } - + // The span is empty, so try to enlarge it to cover an adjacent '{' or '}'. source_map .span_to_source(span, |src, start, end| try { // Adjusting span endpoints by `BytePos(1)` is normally a bug, // but in this case we have specifically checked that the character // we're skipping over is one of two specific ASCII characters, so // adjusting by exactly 1 byte is correct. - if try_next && src.as_bytes()[end] == b'{' { - Some(span.with_hi(hi + BytePos(1))) - } else if try_prev && src.as_bytes()[start - 1] == b'}' { - Some(span.with_lo(lo - BytePos(1))) + if src.as_bytes().get(end).copied() == Some(b'{') { + Some(span.with_hi(span.hi() + BytePos(1))) + } else if start > 0 && src.as_bytes()[start - 1] == b'}' { + Some(span.with_lo(span.lo() - BytePos(1))) } else { None } @@ -96,8 +97,8 @@ fn ensure_non_empty_span( /// it will immediately exit with a fatal error. To prevent that from happening, /// discard regions that are improperly ordered, or might be interpreted in a /// way that makes them improperly ordered. -fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> { - let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span; +fn check_coords(coords: Coords) -> Option<Coords> { + let Coords { start_line, start_col, end_line, end_col } = coords; // Line/column coordinates are supposed to be 1-based. If we ever emit // coordinates of 0, `llvm-cov` might misinterpret them. @@ -110,17 +111,17 @@ fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> let is_ordered = (start_line, start_col) <= (end_line, end_col); if all_nonzero && end_col_has_high_bit_unset && is_ordered { - Some(cov_span) + Some(coords) } else { debug!( - ?cov_span, + ?coords, ?all_nonzero, ?end_col_has_high_bit_unset, ?is_ordered, "Skipping source region that would be misinterpreted or rejected by LLVM" ); // If this happens in a debug build, ICE to make it easier to notice. - debug_assert!(false, "Improper source region: {cov_span:?}"); + debug_assert!(false, "Improper source region: {coords:?}"); None } } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs new file mode 100644 index 00000000000..68f60f169b5 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs @@ -0,0 +1,170 @@ +use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, ConstCodegenMethods}; +use rustc_data_structures::fx::FxHashSet; +use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_middle::mir; +use rustc_middle::mir::mono::MonoItemPartitions; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_span::def_id::DefIdSet; + +use crate::common::CodegenCx; +use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record}; +use crate::llvm; + +/// Each CGU will normally only emit coverage metadata for the functions that it actually generates. +/// But since we don't want unused functions to disappear from coverage reports, we also scan for +/// functions that were instrumented but are not participating in codegen. +/// +/// These unused functions don't need to be codegenned, but we do need to add them to the function +/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them. +/// We also end up adding their symbol names to a special global array that LLVM will include in +/// its embedded coverage data. +pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>( + cx: &CodegenCx<'_, 'tcx>, + covfun_records: &mut Vec<CovfunRecord<'tcx>>, +) { + assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); + + let mut unused_instances = gather_unused_function_instances(cx); + // Sort the unused instances by symbol name, so that their order isn't hash-sensitive. + unused_instances.sort_by_key(|instance| instance.symbol_name); + + // Try to create a covfun record for each unused function. + let mut name_globals = Vec::with_capacity(unused_instances.len()); + covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try { + let record = prepare_covfun_record(cx.tcx, unused.instance, false)?; + // If successful, also store its symbol name in a global constant. + name_globals.push(cx.const_str(unused.symbol_name.name).0); + record + })); + + // Store the names of unused functions in a specially-named global array. + // LLVM's `InstrProfilling` pass will detect this array, and include the + // referenced names in its `__llvm_prf_names` section. + // (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.) + if !name_globals.is_empty() { + let initializer = cx.const_array(cx.type_ptr(), &name_globals); + + let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names"); + llvm::set_global_constant(array, true); + llvm::set_linkage(array, llvm::Linkage::InternalLinkage); + llvm::set_initializer(array, initializer); + } +} + +/// Holds a dummy function instance along with its symbol name, to avoid having +/// to repeatedly query for the name. +struct UnusedInstance<'tcx> { + instance: ty::Instance<'tcx>, + symbol_name: ty::SymbolName<'tcx>, +} + +fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<UnusedInstance<'tcx>> { + assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); + + let tcx = cx.tcx; + let usage = prepare_usage_sets(tcx); + + let is_unused_fn = |def_id: LocalDefId| -> bool { + // Usage sets expect `DefId`, so convert from `LocalDefId`. + let d: DefId = LocalDefId::to_def_id(def_id); + // To be potentially eligible for "unused function" mappings, a definition must: + // - Be eligible for coverage instrumentation + // - Not participate directly in codegen (or have lost all its coverage statements) + // - Not have any coverage statements inlined into codegenned functions + tcx.is_eligible_for_coverage(def_id) + && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d)) + && !usage.used_via_inlining.contains(&d) + }; + + // FIXME(#79651): Consider trying to filter out dummy instantiations of + // unused generic functions from library crates, because they can produce + // "unused instantiation" in coverage reports even when they are actually + // used by some downstream crate in the same binary. + + tcx.mir_keys(()) + .iter() + .copied() + .filter(|&def_id| is_unused_fn(def_id)) + .map(|def_id| make_dummy_instance(tcx, def_id)) + .map(|instance| UnusedInstance { instance, symbol_name: tcx.symbol_name(instance) }) + .collect::<Vec<_>>() +} + +struct UsageSets<'tcx> { + all_mono_items: &'tcx DefIdSet, + used_via_inlining: FxHashSet<DefId>, + missing_own_coverage: FxHashSet<DefId>, +} + +/// Prepare sets of definitions that are relevant to deciding whether something +/// is an "unused function" for coverage purposes. +fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> { + let MonoItemPartitions { all_mono_items, codegen_units, .. } = + tcx.collect_and_partition_mono_items(()); + + // Obtain a MIR body for each function participating in codegen, via an + // arbitrary instance. + let mut def_ids_seen = FxHashSet::default(); + let def_and_mir_for_all_mono_fns = codegen_units + .iter() + .flat_map(|cgu| cgu.items().keys()) + .filter_map(|item| match item { + mir::mono::MonoItem::Fn(instance) => Some(instance), + mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None, + }) + // We only need one arbitrary instance per definition. + .filter(move |instance| def_ids_seen.insert(instance.def_id())) + .map(|instance| { + // We don't care about the instance, just its underlying MIR. + let body = tcx.instance_mir(instance.def); + (instance.def_id(), body) + }); + + // Functions whose coverage statements were found inlined into other functions. + let mut used_via_inlining = FxHashSet::default(); + // Functions that were instrumented, but had all of their coverage statements + // removed by later MIR transforms (e.g. UnreachablePropagation). + let mut missing_own_coverage = FxHashSet::default(); + + for (def_id, body) in def_and_mir_for_all_mono_fns { + let mut saw_own_coverage = false; + + // Inspect every coverage statement in the function's MIR. + for stmt in body + .basic_blocks + .iter() + .flat_map(|block| &block.statements) + .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_))) + { + if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) { + // This coverage statement was inlined from another function. + used_via_inlining.insert(inlined.def_id()); + } else { + // Non-inlined coverage statements belong to the enclosing function. + saw_own_coverage = true; + } + } + + if !saw_own_coverage && body.function_coverage_info.is_some() { + missing_own_coverage.insert(def_id); + } + } + + UsageSets { all_mono_items, used_via_inlining, missing_own_coverage } +} + +fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> { + let def_id = local_def_id.to_def_id(); + + // Make a dummy instance that fills in all generics with placeholders. + ty::Instance::new( + def_id, + ty::GenericArgs::for_item(tcx, def_id, |param, _| { + if let ty::GenericParamDefKind::Lifetime = param.kind { + tcx.lifetimes.re_erased.into() + } else { + tcx.mk_param_from_def(param) + } + }), + ) +} |
