about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo/mapgen')
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs85
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs75
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs170
3 files changed, 254 insertions, 76 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 80e54bf045e..7bdbc685952 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -5,6 +5,7 @@
 //! [^win]: On Windows the section name is `.lcovfun`.
 
 use std::ffi::CString;
+use std::sync::Arc;
 
 use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
@@ -15,7 +16,7 @@ use rustc_middle::mir::coverage::{
     MappingKind, Op,
 };
 use rustc_middle::ty::{Instance, TyCtxt};
-use rustc_span::Span;
+use rustc_span::{SourceFile, Span};
 use rustc_target::spec::HasTargetSpec;
 use tracing::debug;
 
@@ -38,16 +39,15 @@ pub(crate) struct CovfunRecord<'tcx> {
 }
 
 impl<'tcx> CovfunRecord<'tcx> {
-    /// FIXME(Zalathar): Make this the responsibility of the code that determines
-    /// which functions are unused.
-    pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> {
-        (!self.is_used).then_some(self.mangled_function_name)
+    /// Iterator that yields all source files referred to by this function's
+    /// coverage mappings. Used to build the global file table for the CGU.
+    pub(crate) fn all_source_files(&self) -> impl Iterator<Item = &SourceFile> {
+        self.virtual_file_mapping.local_file_table.iter().map(Arc::as_ref)
     }
 }
 
 pub(crate) fn prepare_covfun_record<'tcx>(
     tcx: TyCtxt<'tcx>,
-    global_file_table: &mut GlobalFileTable,
     instance: Instance<'tcx>,
     is_used: bool,
 ) -> Option<CovfunRecord<'tcx>> {
@@ -65,7 +65,7 @@ pub(crate) fn prepare_covfun_record<'tcx>(
         regions: ffi::Regions::default(),
     };
 
-    fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun);
+    fill_region_tables(tcx, fn_cov_info, ids_info, &mut covfun);
 
     if covfun.regions.has_no_regions() {
         debug!(?covfun, "function has no mappings to embed; skipping");
@@ -100,28 +100,37 @@ fn prepare_expressions(ids_info: &CoverageIdsInfo) -> Vec<ffi::CounterExpression
 /// Populates the mapping region tables in the current function's covfun record.
 fn fill_region_tables<'tcx>(
     tcx: TyCtxt<'tcx>,
-    global_file_table: &mut GlobalFileTable,
     fn_cov_info: &'tcx FunctionCoverageInfo,
     ids_info: &'tcx CoverageIdsInfo,
     covfun: &mut CovfunRecord<'tcx>,
 ) {
-    // Currently a function's mappings must all be in the same file as its body span.
+    // Currently a function's mappings must all be in the same file, so use the
+    // first mapping's span to determine the file.
     let source_map = tcx.sess.source_map();
-    let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo());
-
-    // Look up the global file ID for that file.
-    let global_file_id = global_file_table.global_file_id_for_file(&source_file);
-
-    // Associate that global file ID with a local file ID for this function.
-    let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
+    let Some(first_span) = (try { fn_cov_info.mappings.first()?.span }) else {
+        debug_assert!(false, "function has no mappings: {:?}", covfun.mangled_function_name);
+        return;
+    };
+    let source_file = source_map.lookup_source_file(first_span.lo());
 
-    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-        &mut covfun.regions;
+    let local_file_id = covfun.virtual_file_mapping.push_file(&source_file);
 
-    let make_cov_span = |span: Span| {
-        spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span)
-    };
+    // In rare cases, _all_ of a function's spans are discarded, and coverage
+    // codegen needs to handle that gracefully to avoid #133606.
+    // It's hard for tests to trigger this organically, so instead we set
+    // `-Zcoverage-options=discard-all-spans-in-codegen` to force it to occur.
     let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
+    let make_coords = |span: Span| {
+        if discard_all { None } else { spans::make_coords(source_map, &source_file, span) }
+    };
+
+    let ffi::Regions {
+        code_regions,
+        expansion_regions: _, // FIXME(Zalathar): Fill out support for expansion regions
+        branch_regions,
+        mcdc_branch_regions,
+        mcdc_decision_regions,
+    } = &mut covfun.regions;
 
     // For each counter/region pair in this function+file, convert it to a
     // form suitable for FFI.
@@ -136,17 +145,8 @@ fn fill_region_tables<'tcx>(
             ffi::Counter::from_term(term)
         };
 
-        // Convert the `Span` into coordinates that we can pass to LLVM, or
-        // discard the span if conversion fails. In rare, cases _all_ of a
-        // function's spans are discarded, and the rest of coverage codegen
-        // needs to handle that gracefully to avoid a repeat of #133606.
-        // We don't have a good test case for triggering that organically, so
-        // instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
-        // to force it to occur.
-        let Some(cov_span) = make_cov_span(span) else { continue };
-        if discard_all {
-            continue;
-        }
+        let Some(coords) = make_coords(span) else { continue };
+        let cov_span = coords.make_coverage_span(local_file_id);
 
         match *kind {
             MappingKind::Code { bcb } => {
@@ -182,7 +182,7 @@ fn fill_region_tables<'tcx>(
 /// as a global variable in the `__llvm_covfun` section.
 pub(crate) fn generate_covfun_record<'tcx>(
     cx: &CodegenCx<'_, 'tcx>,
-    filenames_hash: u64,
+    global_file_table: &GlobalFileTable,
     covfun: &CovfunRecord<'tcx>,
 ) {
     let &CovfunRecord {
@@ -194,12 +194,19 @@ pub(crate) fn generate_covfun_record<'tcx>(
         ref regions,
     } = covfun;
 
+    let Some(local_file_table) = virtual_file_mapping.resolve_all(global_file_table) else {
+        debug_assert!(
+            false,
+            "all local files should be present in the global file table: \
+                global_file_table = {global_file_table:?}, \
+                virtual_file_mapping = {virtual_file_mapping:?}"
+        );
+        return;
+    };
+
     // Encode the function's coverage mappings into a buffer.
-    let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer(
-        &virtual_file_mapping.to_vec(),
-        expressions,
-        regions,
-    );
+    let coverage_mapping_buffer =
+        llvm_cov::write_function_mappings_to_buffer(&local_file_table, expressions, regions);
 
     // A covfun record consists of four target-endian integers, followed by the
     // encoded mapping data in bytes. Note that the length field is 32 bits.
@@ -212,7 +219,7 @@ pub(crate) fn generate_covfun_record<'tcx>(
             cx.const_u64(func_name_hash),
             cx.const_u32(coverage_mapping_buffer.len() as u32),
             cx.const_u64(source_hash),
-            cx.const_u64(filenames_hash),
+            cx.const_u64(global_file_table.filenames_hash),
             cx.const_bytes(&coverage_mapping_buffer),
         ],
         // This struct needs to be packed, so that the 32-bit length field
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
index 6d1d91340c2..39a59560c9d 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -1,4 +1,3 @@
-use rustc_middle::mir::coverage::FunctionCoverageInfo;
 use rustc_span::source_map::SourceMap;
 use rustc_span::{BytePos, Pos, SourceFile, Span};
 use tracing::debug;
@@ -6,24 +5,41 @@ use tracing::debug;
 use crate::coverageinfo::ffi;
 use crate::coverageinfo::mapgen::LocalFileId;
 
+/// Line and byte-column coordinates of a source code span within some file.
+/// The file itself must be tracked separately.
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct Coords {
+    /// 1-based starting line of the source code span.
+    pub(crate) start_line: u32,
+    /// 1-based starting column (in bytes) of the source code span.
+    pub(crate) start_col: u32,
+    /// 1-based ending line of the source code span.
+    pub(crate) end_line: u32,
+    /// 1-based ending column (in bytes) of the source code span. High bit must be unset.
+    pub(crate) end_col: u32,
+}
+
+impl Coords {
+    /// Attaches a local file ID to these coordinates to produce an `ffi::CoverageSpan`.
+    pub(crate) fn make_coverage_span(&self, local_file_id: LocalFileId) -> ffi::CoverageSpan {
+        let &Self { start_line, start_col, end_line, end_col } = self;
+        let file_id = local_file_id.as_u32();
+        ffi::CoverageSpan { file_id, start_line, start_col, end_line, end_col }
+    }
+}
+
 /// Converts the span into its start line and column, and end line and column.
 ///
 /// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
 /// the compiler, these column numbers are denoted in **bytes**, because that's what
 /// LLVM's `llvm-cov` tool expects to see in coverage maps.
 ///
-/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
+/// Returns `None` if the conversion failed for some reason. This should be uncommon,
 /// but it's hard to rule out entirely (especially in the presence of complex macros
 /// or other expansions), and if it does happen then skipping a span or function is
 /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
-pub(crate) fn make_coverage_span(
-    file_id: LocalFileId,
-    source_map: &SourceMap,
-    fn_cov_info: &FunctionCoverageInfo,
-    file: &SourceFile,
-    span: Span,
-) -> Option<ffi::CoverageSpan> {
-    let span = ensure_non_empty_span(source_map, fn_cov_info, span)?;
+pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) -> Option<Coords> {
+    let span = ensure_non_empty_span(source_map, span)?;
 
     let lo = span.lo();
     let hi = span.hi();
@@ -46,8 +62,7 @@ pub(crate) fn make_coverage_span(
     start_line = source_map.doctest_offset_line(&file.name, start_line);
     end_line = source_map.doctest_offset_line(&file.name, end_line);
 
-    check_coverage_span(ffi::CoverageSpan {
-        file_id: file_id.as_u32(),
+    check_coords(Coords {
         start_line: start_line as u32,
         start_col: start_col as u32,
         end_line: end_line as u32,
@@ -55,36 +70,22 @@ pub(crate) fn make_coverage_span(
     })
 }
 
-fn ensure_non_empty_span(
-    source_map: &SourceMap,
-    fn_cov_info: &FunctionCoverageInfo,
-    span: Span,
-) -> Option<Span> {
+fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> {
     if !span.is_empty() {
         return Some(span);
     }
 
-    let lo = span.lo();
-    let hi = span.hi();
-
-    // The span is empty, so try to expand it to cover an adjacent '{' or '}',
-    // but only within the bounds of the body span.
-    let try_next = hi < fn_cov_info.body_span.hi();
-    let try_prev = fn_cov_info.body_span.lo() < lo;
-    if !(try_next || try_prev) {
-        return None;
-    }
-
+    // The span is empty, so try to enlarge it to cover an adjacent '{' or '}'.
     source_map
         .span_to_source(span, |src, start, end| try {
             // Adjusting span endpoints by `BytePos(1)` is normally a bug,
             // but in this case we have specifically checked that the character
             // we're skipping over is one of two specific ASCII characters, so
             // adjusting by exactly 1 byte is correct.
-            if try_next && src.as_bytes()[end] == b'{' {
-                Some(span.with_hi(hi + BytePos(1)))
-            } else if try_prev && src.as_bytes()[start - 1] == b'}' {
-                Some(span.with_lo(lo - BytePos(1)))
+            if src.as_bytes().get(end).copied() == Some(b'{') {
+                Some(span.with_hi(span.hi() + BytePos(1)))
+            } else if start > 0 && src.as_bytes()[start - 1] == b'}' {
+                Some(span.with_lo(span.lo() - BytePos(1)))
             } else {
                 None
             }
@@ -96,8 +97,8 @@ fn ensure_non_empty_span(
 /// it will immediately exit with a fatal error. To prevent that from happening,
 /// discard regions that are improperly ordered, or might be interpreted in a
 /// way that makes them improperly ordered.
-fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
-    let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
+fn check_coords(coords: Coords) -> Option<Coords> {
+    let Coords { start_line, start_col, end_line, end_col } = coords;
 
     // Line/column coordinates are supposed to be 1-based. If we ever emit
     // coordinates of 0, `llvm-cov` might misinterpret them.
@@ -110,17 +111,17 @@ fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan>
     let is_ordered = (start_line, start_col) <= (end_line, end_col);
 
     if all_nonzero && end_col_has_high_bit_unset && is_ordered {
-        Some(cov_span)
+        Some(coords)
     } else {
         debug!(
-            ?cov_span,
+            ?coords,
             ?all_nonzero,
             ?end_col_has_high_bit_unset,
             ?is_ordered,
             "Skipping source region that would be misinterpreted or rejected by LLVM"
         );
         // If this happens in a debug build, ICE to make it easier to notice.
-        debug_assert!(false, "Improper source region: {cov_span:?}");
+        debug_assert!(false, "Improper source region: {coords:?}");
         None
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
new file mode 100644
index 00000000000..68f60f169b5
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
@@ -0,0 +1,170 @@
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, ConstCodegenMethods};
+use rustc_data_structures::fx::FxHashSet;
+use rustc_hir::def_id::{DefId, LocalDefId};
+use rustc_middle::mir;
+use rustc_middle::mir::mono::MonoItemPartitions;
+use rustc_middle::ty::{self, TyCtxt};
+use rustc_span::def_id::DefIdSet;
+
+use crate::common::CodegenCx;
+use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record};
+use crate::llvm;
+
+/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
+/// But since we don't want unused functions to disappear from coverage reports, we also scan for
+/// functions that were instrumented but are not participating in codegen.
+///
+/// These unused functions don't need to be codegenned, but we do need to add them to the function
+/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
+/// We also end up adding their symbol names to a special global array that LLVM will include in
+/// its embedded coverage data.
+pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
+    cx: &CodegenCx<'_, 'tcx>,
+    covfun_records: &mut Vec<CovfunRecord<'tcx>>,
+) {
+    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
+
+    let mut unused_instances = gather_unused_function_instances(cx);
+    // Sort the unused instances by symbol name, so that their order isn't hash-sensitive.
+    unused_instances.sort_by_key(|instance| instance.symbol_name);
+
+    // Try to create a covfun record for each unused function.
+    let mut name_globals = Vec::with_capacity(unused_instances.len());
+    covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try {
+        let record = prepare_covfun_record(cx.tcx, unused.instance, false)?;
+        // If successful, also store its symbol name in a global constant.
+        name_globals.push(cx.const_str(unused.symbol_name.name).0);
+        record
+    }));
+
+    // Store the names of unused functions in a specially-named global array.
+    // LLVM's `InstrProfilling` pass will detect this array, and include the
+    // referenced names in its `__llvm_prf_names` section.
+    // (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
+    if !name_globals.is_empty() {
+        let initializer = cx.const_array(cx.type_ptr(), &name_globals);
+
+        let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
+        llvm::set_global_constant(array, true);
+        llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
+        llvm::set_initializer(array, initializer);
+    }
+}
+
+/// Holds a dummy function instance along with its symbol name, to avoid having
+/// to repeatedly query for the name.
+struct UnusedInstance<'tcx> {
+    instance: ty::Instance<'tcx>,
+    symbol_name: ty::SymbolName<'tcx>,
+}
+
+fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<UnusedInstance<'tcx>> {
+    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
+
+    let tcx = cx.tcx;
+    let usage = prepare_usage_sets(tcx);
+
+    let is_unused_fn = |def_id: LocalDefId| -> bool {
+        // Usage sets expect `DefId`, so convert from `LocalDefId`.
+        let d: DefId = LocalDefId::to_def_id(def_id);
+        // To be potentially eligible for "unused function" mappings, a definition must:
+        // - Be eligible for coverage instrumentation
+        // - Not participate directly in codegen (or have lost all its coverage statements)
+        // - Not have any coverage statements inlined into codegenned functions
+        tcx.is_eligible_for_coverage(def_id)
+            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
+            && !usage.used_via_inlining.contains(&d)
+    };
+
+    // FIXME(#79651): Consider trying to filter out dummy instantiations of
+    // unused generic functions from library crates, because they can produce
+    // "unused instantiation" in coverage reports even when they are actually
+    // used by some downstream crate in the same binary.
+
+    tcx.mir_keys(())
+        .iter()
+        .copied()
+        .filter(|&def_id| is_unused_fn(def_id))
+        .map(|def_id| make_dummy_instance(tcx, def_id))
+        .map(|instance| UnusedInstance { instance, symbol_name: tcx.symbol_name(instance) })
+        .collect::<Vec<_>>()
+}
+
+struct UsageSets<'tcx> {
+    all_mono_items: &'tcx DefIdSet,
+    used_via_inlining: FxHashSet<DefId>,
+    missing_own_coverage: FxHashSet<DefId>,
+}
+
+/// Prepare sets of definitions that are relevant to deciding whether something
+/// is an "unused function" for coverage purposes.
+fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
+    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
+        tcx.collect_and_partition_mono_items(());
+
+    // Obtain a MIR body for each function participating in codegen, via an
+    // arbitrary instance.
+    let mut def_ids_seen = FxHashSet::default();
+    let def_and_mir_for_all_mono_fns = codegen_units
+        .iter()
+        .flat_map(|cgu| cgu.items().keys())
+        .filter_map(|item| match item {
+            mir::mono::MonoItem::Fn(instance) => Some(instance),
+            mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
+        })
+        // We only need one arbitrary instance per definition.
+        .filter(move |instance| def_ids_seen.insert(instance.def_id()))
+        .map(|instance| {
+            // We don't care about the instance, just its underlying MIR.
+            let body = tcx.instance_mir(instance.def);
+            (instance.def_id(), body)
+        });
+
+    // Functions whose coverage statements were found inlined into other functions.
+    let mut used_via_inlining = FxHashSet::default();
+    // Functions that were instrumented, but had all of their coverage statements
+    // removed by later MIR transforms (e.g. UnreachablePropagation).
+    let mut missing_own_coverage = FxHashSet::default();
+
+    for (def_id, body) in def_and_mir_for_all_mono_fns {
+        let mut saw_own_coverage = false;
+
+        // Inspect every coverage statement in the function's MIR.
+        for stmt in body
+            .basic_blocks
+            .iter()
+            .flat_map(|block| &block.statements)
+            .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
+        {
+            if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
+                // This coverage statement was inlined from another function.
+                used_via_inlining.insert(inlined.def_id());
+            } else {
+                // Non-inlined coverage statements belong to the enclosing function.
+                saw_own_coverage = true;
+            }
+        }
+
+        if !saw_own_coverage && body.function_coverage_info.is_some() {
+            missing_own_coverage.insert(def_id);
+        }
+    }
+
+    UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
+}
+
+fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
+    let def_id = local_def_id.to_def_id();
+
+    // Make a dummy instance that fills in all generics with placeholders.
+    ty::Instance::new(
+        def_id,
+        ty::GenericArgs::for_item(tcx, def_id, |param, _| {
+            if let ty::GenericParamDefKind::Lifetime = param.kind {
+                tcx.lifetimes.re_erased.into()
+            } else {
+                tcx.mk_param_from_def(param)
+            }
+        }),
+    )
+}