diff options
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo')
7 files changed, 560 insertions, 598 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs index a6e07ea2a60..b617f4d37f5 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs @@ -1,6 +1,4 @@ -use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId, SourceRegion}; - -use crate::coverageinfo::mapgen::LocalFileId; +use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId}; /// Must match the layout of `LLVMRustCounterKind`. #[derive(Copy, Clone, Debug)] @@ -126,29 +124,43 @@ pub(crate) struct CoverageSpan { /// Local index into the function's local-to-global file ID table. /// The value at that index is itself an index into the coverage filename /// table in the CGU's `__llvm_covmap` section. - file_id: u32, + pub(crate) file_id: u32, /// 1-based starting line of the source code span. - start_line: u32, + pub(crate) start_line: u32, /// 1-based starting column of the source code span. - start_col: u32, + pub(crate) start_col: u32, /// 1-based ending line of the source code span. - end_line: u32, + pub(crate) end_line: u32, /// 1-based ending column of the source code span. High bit must be unset. - end_col: u32, + pub(crate) end_col: u32, +} + +/// Holds tables of the various region types in one struct. +/// +/// Don't pass this struct across FFI; pass the individual region tables as +/// pointer/length pairs instead. +/// +/// Each field name has a `_regions` suffix for improved readability after +/// exhaustive destructing, which ensures that all region types are handled. +#[derive(Clone, Debug, Default)] +pub(crate) struct Regions { + pub(crate) code_regions: Vec<CodeRegion>, + pub(crate) branch_regions: Vec<BranchRegion>, + pub(crate) mcdc_branch_regions: Vec<MCDCBranchRegion>, + pub(crate) mcdc_decision_regions: Vec<MCDCDecisionRegion>, } -impl CoverageSpan { - pub(crate) fn from_source_region( - local_file_id: LocalFileId, - code_region: &SourceRegion, - ) -> Self { - let file_id = local_file_id.as_u32(); - let &SourceRegion { start_line, start_col, end_line, end_col } = code_region; - // Internally, LLVM uses the high bit of `end_col` to distinguish between - // code regions and gap regions, so it can't be used by the column number. - assert!(end_col & (1u32 << 31) == 0, "high bit of `end_col` must be unset: {end_col:#X}"); - Self { file_id, start_line, start_col, end_line, end_col } +impl Regions { + /// Returns true if none of this structure's tables contain any regions. + pub(crate) fn has_no_regions(&self) -> bool { + let Self { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } = + self; + + code_regions.is_empty() + && branch_regions.is_empty() + && mcdc_branch_regions.is_empty() + && mcdc_decision_regions.is_empty() } } @@ -156,7 +168,7 @@ impl CoverageSpan { #[derive(Clone, Debug)] #[repr(C)] pub(crate) struct CodeRegion { - pub(crate) span: CoverageSpan, + pub(crate) cov_span: CoverageSpan, pub(crate) counter: Counter, } @@ -164,7 +176,7 @@ pub(crate) struct CodeRegion { #[derive(Clone, Debug)] #[repr(C)] pub(crate) struct BranchRegion { - pub(crate) span: CoverageSpan, + pub(crate) cov_span: CoverageSpan, pub(crate) true_counter: Counter, pub(crate) false_counter: Counter, } @@ -173,7 +185,7 @@ pub(crate) struct BranchRegion { #[derive(Clone, Debug)] #[repr(C)] pub(crate) struct MCDCBranchRegion { - pub(crate) span: CoverageSpan, + pub(crate) cov_span: CoverageSpan, pub(crate) true_counter: Counter, pub(crate) false_counter: Counter, pub(crate) mcdc_branch_params: mcdc::BranchParameters, @@ -183,6 +195,6 @@ pub(crate) struct MCDCBranchRegion { #[derive(Clone, Debug)] #[repr(C)] pub(crate) struct MCDCDecisionRegion { - pub(crate) span: CoverageSpan, + pub(crate) cov_span: CoverageSpan, pub(crate) mcdc_decision_params: mcdc::DecisionParameters, } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs index 99c2d12b261..2cd7fa3225a 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs @@ -40,11 +40,10 @@ pub(crate) fn create_pgo_func_name_var<'ll>( } } -pub(crate) fn write_filenames_to_buffer<'a>( - filenames: impl IntoIterator<Item = &'a str>, -) -> Vec<u8> { +pub(crate) fn write_filenames_to_buffer(filenames: &[impl AsRef<str>]) -> Vec<u8> { let (pointers, lengths) = filenames .into_iter() + .map(AsRef::as_ref) .map(|s: &str| (s.as_c_char_ptr(), s.len())) .unzip::<_, _, Vec<_>, Vec<_>>(); @@ -62,11 +61,10 @@ pub(crate) fn write_filenames_to_buffer<'a>( pub(crate) fn write_function_mappings_to_buffer( virtual_file_mapping: &[u32], expressions: &[ffi::CounterExpression], - code_regions: &[ffi::CodeRegion], - branch_regions: &[ffi::BranchRegion], - mcdc_branch_regions: &[ffi::MCDCBranchRegion], - mcdc_decision_regions: &[ffi::MCDCDecisionRegion], + regions: &ffi::Regions, ) -> Vec<u8> { + let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } = + regions; llvm::build_byte_buffer(|buffer| unsafe { llvm::LLVMRustCoverageWriteFunctionMappingsToBuffer( virtual_file_mapping.as_ptr(), diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs deleted file mode 100644 index 0752c718c70..00000000000 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs +++ /dev/null @@ -1,232 +0,0 @@ -use rustc_data_structures::captures::Captures; -use rustc_data_structures::fx::FxIndexSet; -use rustc_index::bit_set::BitSet; -use rustc_middle::mir::CoverageIdsInfo; -use rustc_middle::mir::coverage::{ - CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, MappingKind, Op, - SourceRegion, -}; -use rustc_middle::ty::Instance; -use tracing::debug; - -use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind}; - -/// Holds all of the coverage mapping data associated with a function instance, -/// collected during traversal of `Coverage` statements in the function's MIR. -#[derive(Debug)] -pub(crate) struct FunctionCoverageCollector<'tcx> { - /// Coverage info that was attached to this function by the instrumentor. - function_coverage_info: &'tcx FunctionCoverageInfo, - ids_info: &'tcx CoverageIdsInfo, - is_used: bool, -} - -impl<'tcx> FunctionCoverageCollector<'tcx> { - /// Creates a new set of coverage data for a used (called) function. - pub(crate) fn new( - instance: Instance<'tcx>, - function_coverage_info: &'tcx FunctionCoverageInfo, - ids_info: &'tcx CoverageIdsInfo, - ) -> Self { - Self::create(instance, function_coverage_info, ids_info, true) - } - - /// Creates a new set of coverage data for an unused (never called) function. - pub(crate) fn unused( - instance: Instance<'tcx>, - function_coverage_info: &'tcx FunctionCoverageInfo, - ids_info: &'tcx CoverageIdsInfo, - ) -> Self { - Self::create(instance, function_coverage_info, ids_info, false) - } - - fn create( - instance: Instance<'tcx>, - function_coverage_info: &'tcx FunctionCoverageInfo, - ids_info: &'tcx CoverageIdsInfo, - is_used: bool, - ) -> Self { - let num_counters = function_coverage_info.num_counters; - let num_expressions = function_coverage_info.expressions.len(); - debug!( - "FunctionCoverage::create(instance={instance:?}) has \ - num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}" - ); - - Self { function_coverage_info, ids_info, is_used } - } - - /// Identify expressions that will always have a value of zero, and note - /// their IDs in [`ZeroExpressions`]. Mappings that refer to a zero expression - /// can instead become mappings to a constant zero value. - /// - /// This method mainly exists to preserve the simplifications that were - /// already being performed by the Rust-side expression renumbering, so that - /// the resulting coverage mappings don't get worse. - fn identify_zero_expressions(&self) -> ZeroExpressions { - // The set of expressions that either were optimized out entirely, or - // have zero as both of their operands, and will therefore always have - // a value of zero. Other expressions that refer to these as operands - // can have those operands replaced with `CovTerm::Zero`. - let mut zero_expressions = ZeroExpressions::default(); - - // Simplify a copy of each expression based on lower-numbered expressions, - // and then update the set of always-zero expressions if necessary. - // (By construction, expressions can only refer to other expressions - // that have lower IDs, so one pass is sufficient.) - for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() { - if !self.is_used || !self.ids_info.expressions_seen.contains(id) { - // If an expression was not seen, it must have been optimized away, - // so any operand that refers to it can be replaced with zero. - zero_expressions.insert(id); - continue; - } - - // We don't need to simplify the actual expression data in the - // expressions list; we can just simplify a temporary copy and then - // use that to update the set of always-zero expressions. - let Expression { mut lhs, op, mut rhs } = *expression; - - // If an expression has an operand that is also an expression, the - // operand's ID must be strictly lower. This is what lets us find - // all zero expressions in one pass. - let assert_operand_expression_is_lower = |operand_id: ExpressionId| { - assert!( - operand_id < id, - "Operand {operand_id:?} should be less than {id:?} in {expression:?}", - ) - }; - - // If an operand refers to a counter or expression that is always - // zero, then that operand can be replaced with `CovTerm::Zero`. - let maybe_set_operand_to_zero = |operand: &mut CovTerm| { - if let CovTerm::Expression(id) = *operand { - assert_operand_expression_is_lower(id); - } - - if is_zero_term(&self.ids_info.counters_seen, &zero_expressions, *operand) { - *operand = CovTerm::Zero; - } - }; - maybe_set_operand_to_zero(&mut lhs); - maybe_set_operand_to_zero(&mut rhs); - - // Coverage counter values cannot be negative, so if an expression - // involves subtraction from zero, assume that its RHS must also be zero. - // (Do this after simplifications that could set the LHS to zero.) - if lhs == CovTerm::Zero && op == Op::Subtract { - rhs = CovTerm::Zero; - } - - // After the above simplifications, if both operands are zero, then - // we know that this expression is always zero too. - if lhs == CovTerm::Zero && rhs == CovTerm::Zero { - zero_expressions.insert(id); - } - } - - zero_expressions - } - - pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> { - let zero_expressions = self.identify_zero_expressions(); - let FunctionCoverageCollector { function_coverage_info, ids_info, is_used, .. } = self; - - FunctionCoverage { function_coverage_info, ids_info, is_used, zero_expressions } - } -} - -pub(crate) struct FunctionCoverage<'tcx> { - pub(crate) function_coverage_info: &'tcx FunctionCoverageInfo, - ids_info: &'tcx CoverageIdsInfo, - is_used: bool, - - zero_expressions: ZeroExpressions, -} - -impl<'tcx> FunctionCoverage<'tcx> { - /// Returns true for a used (called) function, and false for an unused function. - pub(crate) fn is_used(&self) -> bool { - self.is_used - } - - /// Return the source hash, generated from the HIR node structure, and used to indicate whether - /// or not the source code structure changed between different compilations. - pub(crate) fn source_hash(&self) -> u64 { - if self.is_used { self.function_coverage_info.function_source_hash } else { 0 } - } - - /// Convert this function's coverage expression data into a form that can be - /// passed through FFI to LLVM. - pub(crate) fn counter_expressions( - &self, - ) -> impl Iterator<Item = CounterExpression> + ExactSizeIterator + Captures<'_> { - // We know that LLVM will optimize out any unused expressions before - // producing the final coverage map, so there's no need to do the same - // thing on the Rust side unless we're confident we can do much better. - // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.) - - self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| { - CounterExpression { - lhs: self.counter_for_term(lhs), - kind: match op { - Op::Add => ExprKind::Add, - Op::Subtract => ExprKind::Subtract, - }, - rhs: self.counter_for_term(rhs), - } - }) - } - - /// Converts this function's coverage mappings into an intermediate form - /// that will be used by `mapgen` when preparing for FFI. - pub(crate) fn counter_regions( - &self, - ) -> impl Iterator<Item = (MappingKind, &SourceRegion)> + ExactSizeIterator { - self.function_coverage_info.mappings.iter().map(move |mapping| { - let Mapping { kind, source_region } = mapping; - let kind = - kind.map_terms(|term| if self.is_zero_term(term) { CovTerm::Zero } else { term }); - (kind, source_region) - }) - } - - fn counter_for_term(&self, term: CovTerm) -> Counter { - if self.is_zero_term(term) { Counter::ZERO } else { Counter::from_term(term) } - } - - fn is_zero_term(&self, term: CovTerm) -> bool { - !self.is_used || is_zero_term(&self.ids_info.counters_seen, &self.zero_expressions, term) - } -} - -/// Set of expression IDs that are known to always evaluate to zero. -/// Any mapping or expression operand that refers to these expressions can have -/// that reference replaced with a constant zero value. -#[derive(Default)] -struct ZeroExpressions(FxIndexSet<ExpressionId>); - -impl ZeroExpressions { - fn insert(&mut self, id: ExpressionId) { - self.0.insert(id); - } - - fn contains(&self, id: ExpressionId) -> bool { - self.0.contains(&id) - } -} - -/// Returns `true` if the given term is known to have a value of zero, taking -/// into account knowledge of which counters are unused and which expressions -/// are always zero. -fn is_zero_term( - counters_seen: &BitSet<CounterId>, - zero_expressions: &ZeroExpressions, - term: CovTerm, -) -> bool { - match term { - CovTerm::Zero => true, - CovTerm::Counter(id) => !counters_seen.contains(id), - CovTerm::Expression(id) => zero_expressions.contains(id), - } -} diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 8c24579fa7c..b3ad2a0e409 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -1,29 +1,29 @@ -use std::ffi::CString; -use std::iter; +use std::sync::Arc; -use itertools::Itertools as _; +use itertools::Itertools; use rustc_abi::Align; use rustc_codegen_ssa::traits::{ BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods, }; -use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; +use rustc_data_structures::fx::{FxHashSet, FxIndexMap}; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_index::IndexVec; -use rustc_middle::mir::coverage::MappingKind; +use rustc_middle::mir; use rustc_middle::ty::{self, TyCtxt}; -use rustc_middle::{bug, mir}; use rustc_session::RemapFileNameExt; use rustc_session::config::RemapPathScopeComponents; use rustc_span::def_id::DefIdSet; -use rustc_span::{Span, Symbol}; -use rustc_target::spec::HasTargetSpec; +use rustc_span::{SourceFile, StableSourceFileId}; use tracing::debug; use crate::common::CodegenCx; -use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector}; -use crate::coverageinfo::{ffi, llvm_cov}; +use crate::coverageinfo::llvm_cov; +use crate::coverageinfo::mapgen::covfun::prepare_covfun_record; use crate::llvm; +mod covfun; +mod spans; + /// Generates and exports the coverage map, which is embedded in special /// linker sections in the final binary. /// @@ -49,83 +49,62 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name()); - // In order to show that unused functions have coverage counts of zero (0), LLVM requires the - // functions exist. Generate synthetic functions with a (required) single counter, and add the - // MIR `Coverage` code regions to the `function_coverage_map`, before calling - // `ctx.take_function_coverage_map()`. - if cx.codegen_unit.is_code_coverage_dead_code_cgu() { - add_unused_functions(cx); - } - // FIXME(#132395): Can this be none even when coverage is enabled? - let function_coverage_map = match cx.coverage_cx { - Some(ref cx) => cx.take_function_coverage_map(), + let instances_used = match cx.coverage_cx { + Some(ref cx) => cx.instances_used.borrow(), None => return, }; - if function_coverage_map.is_empty() { - // This module has no functions with coverage instrumentation - return; - } - let function_coverage_entries = function_coverage_map - .into_iter() - .map(|(instance, function_coverage)| (instance, function_coverage.into_finished())) - .collect::<Vec<_>>(); + // The order of entries in this global file table needs to be deterministic, + // and ideally should also be independent of the details of stable-hashing, + // because coverage tests snapshots (`.cov-map`) can observe the order and + // would need to be re-blessed if it changes. As long as those requirements + // are satisfied, the order can be arbitrary. + let mut global_file_table = GlobalFileTable::new(); - let all_file_names = function_coverage_entries + let mut covfun_records = instances_used .iter() - .map(|(_, fn_cov)| fn_cov.function_coverage_info.body_span) - .map(|span| span_file_name(tcx, span)); - let global_file_table = GlobalFileTable::new(all_file_names); - - // Encode all filenames referenced by coverage mappings in this CGU. - let filenames_buffer = global_file_table.make_filenames_buffer(tcx); - - let filenames_size = filenames_buffer.len(); - let filenames_val = cx.const_bytes(&filenames_buffer); - let filenames_ref = llvm_cov::hash_bytes(&filenames_buffer); - - // Generate the coverage map header, which contains the filenames used by - // this CGU's coverage mappings, and store it in a well-known global. - generate_covmap_record(cx, covmap_version, filenames_size, filenames_val); - - let mut unused_function_names = Vec::new(); + .copied() + // Sort by symbol name, so that the global file table is built in an + // order that doesn't depend on the stable-hash-based order in which + // instances were visited during codegen. + .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name) + .filter_map(|instance| prepare_covfun_record(tcx, &mut global_file_table, instance, true)) + .collect::<Vec<_>>(); - // Encode coverage mappings and generate function records - for (instance, function_coverage) in function_coverage_entries { - debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance); + // In a single designated CGU, also prepare covfun records for functions + // in this crate that were instrumented for coverage, but are unused. + if cx.codegen_unit.is_code_coverage_dead_code_cgu() { + let mut unused_instances = gather_unused_function_instances(cx); + // Sort the unused instances by symbol name, for the same reason as the used ones. + unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name); + covfun_records.extend(unused_instances.into_iter().filter_map(|instance| { + prepare_covfun_record(tcx, &mut global_file_table, instance, false) + })); + } - let mangled_function_name = tcx.symbol_name(instance).name; - let source_hash = function_coverage.source_hash(); - let is_used = function_coverage.is_used(); + // If there are no covfun records for this CGU, don't generate a covmap record. + // Emitting a covmap record without any covfun records causes `llvm-cov` to + // fail when generating coverage reports, and if there are no covfun records + // then the covmap record isn't useful anyway. + // This should prevent a repeat of <https://github.com/rust-lang/rust/issues/133606>. + if covfun_records.is_empty() { + return; + } - let coverage_mapping_buffer = - encode_mappings_for_function(tcx, &global_file_table, &function_coverage); + // Encode all filenames referenced by coverage mappings in this CGU. + let filenames_buffer = global_file_table.make_filenames_buffer(tcx); + // The `llvm-cov` tool uses this hash to associate each covfun record with + // its corresponding filenames table, since the final binary will typically + // contain multiple covmap records from different compilation units. + let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer); - if coverage_mapping_buffer.is_empty() { - if function_coverage.is_used() { - bug!( - "A used function should have had coverage mapping data but did not: {}", - mangled_function_name - ); - } else { - debug!("unused function had no coverage mapping data: {}", mangled_function_name); - continue; - } - } + let mut unused_function_names = vec![]; - if !is_used { - unused_function_names.push(mangled_function_name); - } + for covfun in &covfun_records { + unused_function_names.extend(covfun.mangled_function_name_if_unused()); - generate_covfun_record( - cx, - mangled_function_name, - source_hash, - filenames_ref, - coverage_mapping_buffer, - is_used, - ); + covfun::generate_covfun_record(cx, filenames_hash, covfun) } // For unused functions, we need to take their mangled names and store them @@ -146,56 +125,58 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) { llvm::set_linkage(array, llvm::Linkage::InternalLinkage); llvm::set_initializer(array, initializer); } + + // Generate the coverage map header, which contains the filenames used by + // this CGU's coverage mappings, and store it in a well-known global. + // (This is skipped if we returned early due to having no covfun records.) + generate_covmap_record(cx, covmap_version, &filenames_buffer); } -/// Maps "global" (per-CGU) file ID numbers to their underlying filenames. +/// Maps "global" (per-CGU) file ID numbers to their underlying source files. struct GlobalFileTable { - /// This "raw" table doesn't include the working dir, so a filename's + /// This "raw" table doesn't include the working dir, so a file's /// global ID is its index in this set **plus one**. - raw_file_table: FxIndexSet<Symbol>, + raw_file_table: FxIndexMap<StableSourceFileId, Arc<SourceFile>>, } impl GlobalFileTable { - fn new(all_file_names: impl IntoIterator<Item = Symbol>) -> Self { - // Collect all of the filenames into a set. Filenames usually come in - // contiguous runs, so we can dedup adjacent ones to save work. - let mut raw_file_table = all_file_names.into_iter().dedup().collect::<FxIndexSet<Symbol>>(); - - // Sort the file table by its actual string values, not the arbitrary - // ordering of its symbols. - raw_file_table.sort_unstable_by(|a, b| a.as_str().cmp(b.as_str())); - - Self { raw_file_table } + fn new() -> Self { + Self { raw_file_table: FxIndexMap::default() } } - fn global_file_id_for_file_name(&self, file_name: Symbol) -> GlobalFileId { - let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| { - bug!("file name not found in prepared global file table: {file_name}"); - }); + fn global_file_id_for_file(&mut self, file: &Arc<SourceFile>) -> GlobalFileId { + // Ensure the given file has a table entry, and get its index. + let entry = self.raw_file_table.entry(file.stable_id); + let raw_id = entry.index(); + entry.or_insert_with(|| Arc::clone(file)); + // The raw file table doesn't include an entry for the working dir // (which has ID 0), so add 1 to get the correct ID. GlobalFileId::from_usize(raw_id + 1) } fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> { + let mut table = Vec::with_capacity(self.raw_file_table.len() + 1); + // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) // requires setting the first filename to the compilation directory. // Since rustc generates coverage maps with relative paths, the // compilation directory can be combined with the relative paths // to get absolute paths, if needed. - use rustc_session::RemapFileNameExt; - use rustc_session::config::RemapPathScopeComponents; - let working_dir: &str = &tcx - .sess - .opts - .working_dir - .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) - .to_string_lossy(); - - // Insert the working dir at index 0, before the other filenames. - let filenames = - iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str)); - llvm_cov::write_filenames_to_buffer(filenames) + table.push( + tcx.sess + .opts + .working_dir + .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) + .to_string_lossy(), + ); + + // Add the regular entries after the base directory. + table.extend(self.raw_file_table.values().map(|file| { + file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy() + })); + + llvm_cov::write_filenames_to_buffer(&table) } } @@ -208,12 +189,12 @@ rustc_index::newtype_index! { /// An index into a function's list of global file IDs. That underlying list /// of local-to-global mappings will be embedded in the function's record in /// the `__llvm_covfun` linker section. - pub(crate) struct LocalFileId {} + struct LocalFileId {} } /// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) /// file IDs. -#[derive(Default)] +#[derive(Debug, Default)] struct VirtualFileMapping { local_to_global: IndexVec<LocalFileId, GlobalFileId>, global_to_local: FxIndexMap<GlobalFileId, LocalFileId>, @@ -227,187 +208,45 @@ impl VirtualFileMapping { .or_insert_with(|| self.local_to_global.push(global_file_id)) } - fn into_vec(self) -> Vec<u32> { - // This conversion should be optimized away to ~zero overhead. - // In any case, it's probably not hot enough to worry about. - self.local_to_global.into_iter().map(|global| global.as_u32()).collect() - } -} - -fn span_file_name(tcx: TyCtxt<'_>, span: Span) -> Symbol { - let source_file = tcx.sess.source_map().lookup_source_file(span.lo()); - let name = - source_file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy(); - Symbol::intern(&name) -} - -/// Using the expressions and counter regions collected for a single function, -/// generate the variable-sized payload of its corresponding `__llvm_covfun` -/// entry. The payload is returned as a vector of bytes. -/// -/// Newly-encountered filenames will be added to the global file table. -fn encode_mappings_for_function( - tcx: TyCtxt<'_>, - global_file_table: &GlobalFileTable, - function_coverage: &FunctionCoverage<'_>, -) -> Vec<u8> { - let counter_regions = function_coverage.counter_regions(); - if counter_regions.is_empty() { - return Vec::new(); - } - - let expressions = function_coverage.counter_expressions().collect::<Vec<_>>(); - - let mut virtual_file_mapping = VirtualFileMapping::default(); - let mut code_regions = vec![]; - let mut branch_regions = vec![]; - let mut mcdc_branch_regions = vec![]; - let mut mcdc_decision_regions = vec![]; - - // Currently a function's mappings must all be in the same file as its body span. - let file_name = span_file_name(tcx, function_coverage.function_coverage_info.body_span); - - // Look up the global file ID for that filename. - let global_file_id = global_file_table.global_file_id_for_file_name(file_name); - - // Associate that global file ID with a local file ID for this function. - let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id); - debug!(" file id: {local_file_id:?} => {global_file_id:?} = '{file_name:?}'"); - - // For each counter/region pair in this function+file, convert it to a - // form suitable for FFI. - for (mapping_kind, region) in counter_regions { - debug!("Adding counter {mapping_kind:?} to map for {region:?}"); - let span = ffi::CoverageSpan::from_source_region(local_file_id, region); - match mapping_kind { - MappingKind::Code(term) => { - code_regions.push(ffi::CodeRegion { span, counter: ffi::Counter::from_term(term) }); - } - MappingKind::Branch { true_term, false_term } => { - branch_regions.push(ffi::BranchRegion { - span, - true_counter: ffi::Counter::from_term(true_term), - false_counter: ffi::Counter::from_term(false_term), - }); - } - MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => { - mcdc_branch_regions.push(ffi::MCDCBranchRegion { - span, - true_counter: ffi::Counter::from_term(true_term), - false_counter: ffi::Counter::from_term(false_term), - mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params), - }); - } - MappingKind::MCDCDecision(mcdc_decision_params) => { - mcdc_decision_regions.push(ffi::MCDCDecisionRegion { - span, - mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params), - }); - } - } + fn to_vec(&self) -> Vec<u32> { + // This clone could be avoided by transmuting `&[GlobalFileId]` to `&[u32]`, + // but it isn't hot or expensive enough to justify the extra unsafety. + self.local_to_global.iter().map(|&global| GlobalFileId::as_u32(global)).collect() } - - // Encode the function's coverage mappings into a buffer. - llvm_cov::write_function_mappings_to_buffer( - &virtual_file_mapping.into_vec(), - &expressions, - &code_regions, - &branch_regions, - &mcdc_branch_regions, - &mcdc_decision_regions, - ) } /// Generates the contents of the covmap record for this CGU, which mostly /// consists of a header and a list of filenames. The record is then stored /// as a global variable in the `__llvm_covmap` section. -fn generate_covmap_record<'ll>( - cx: &CodegenCx<'ll, '_>, - version: u32, - filenames_size: usize, - filenames_val: &'ll llvm::Value, -) { - debug!("cov map: filenames_size = {}, 0-based version = {}", filenames_size, version); - - // Create the coverage data header (Note, fields 0 and 2 are now always zero, - // as of `llvm::coverage::CovMapVersion::Version4`.) - let zero_was_n_records_val = cx.const_u32(0); - let filenames_size_val = cx.const_u32(filenames_size as u32); - let zero_was_coverage_size_val = cx.const_u32(0); - let version_val = cx.const_u32(version); - let cov_data_header_val = cx.const_struct( - &[zero_was_n_records_val, filenames_size_val, zero_was_coverage_size_val, version_val], - /*packed=*/ false, - ); - - // Create the complete LLVM coverage data value to add to the LLVM IR - let covmap_data = - cx.const_struct(&[cov_data_header_val, filenames_val], /*packed=*/ false); - - let llglobal = llvm::add_global(cx.llmod, cx.val_ty(covmap_data), &llvm_cov::covmap_var_name()); - llvm::set_initializer(llglobal, covmap_data); - llvm::set_global_constant(llglobal, true); - llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); - llvm::set_section(llglobal, &llvm_cov::covmap_section_name(cx.llmod)); - // LLVM's coverage mapping format specifies 8-byte alignment for items in this section. - // <https://llvm.org/docs/CoverageMappingFormat.html> - llvm::set_alignment(llglobal, Align::EIGHT); - cx.add_used_global(llglobal); -} - -/// Generates the contents of the covfun record for this function, which -/// contains the function's coverage mapping data. The record is then stored -/// as a global variable in the `__llvm_covfun` section. -fn generate_covfun_record( - cx: &CodegenCx<'_, '_>, - mangled_function_name: &str, - source_hash: u64, - filenames_ref: u64, - coverage_mapping_buffer: Vec<u8>, - is_used: bool, -) { - // Concatenate the encoded coverage mappings - let coverage_mapping_size = coverage_mapping_buffer.len(); - let coverage_mapping_val = cx.const_bytes(&coverage_mapping_buffer); - - let func_name_hash = llvm_cov::hash_bytes(mangled_function_name.as_bytes()); - let func_name_hash_val = cx.const_u64(func_name_hash); - let coverage_mapping_size_val = cx.const_u32(coverage_mapping_size as u32); - let source_hash_val = cx.const_u64(source_hash); - let filenames_ref_val = cx.const_u64(filenames_ref); - let func_record_val = cx.const_struct( +fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) { + // A covmap record consists of four target-endian u32 values, followed by + // the encoded filenames table. Two of the header fields are unused in + // modern versions of the LLVM coverage mapping format, and are always 0. + // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation> + // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp`. + let covmap_header = cx.const_struct( &[ - func_name_hash_val, - coverage_mapping_size_val, - source_hash_val, - filenames_ref_val, - coverage_mapping_val, + cx.const_u32(0), // (unused) + cx.const_u32(filenames_buffer.len() as u32), + cx.const_u32(0), // (unused) + cx.const_u32(version), ], - /*packed=*/ true, + /* packed */ false, ); - - // Choose a variable name to hold this function's covfun data. - // Functions that are used have a suffix ("u") to distinguish them from - // unused copies of the same function (from different CGUs), so that if a - // linker sees both it won't discard the used copy's data. - let func_record_var_name = - CString::new(format!("__covrec_{:X}{}", func_name_hash, if is_used { "u" } else { "" })) - .unwrap(); - debug!("function record var name: {:?}", func_record_var_name); - - let llglobal = llvm::add_global(cx.llmod, cx.val_ty(func_record_val), &func_record_var_name); - llvm::set_initializer(llglobal, func_record_val); - llvm::set_global_constant(llglobal, true); - llvm::set_linkage(llglobal, llvm::Linkage::LinkOnceODRLinkage); - llvm::set_visibility(llglobal, llvm::Visibility::Hidden); - llvm::set_section(llglobal, cx.covfun_section_name()); + let covmap_record = cx + .const_struct(&[covmap_header, cx.const_bytes(filenames_buffer)], /* packed */ false); + + let covmap_global = + llvm::add_global(cx.llmod, cx.val_ty(covmap_record), &llvm_cov::covmap_var_name()); + llvm::set_initializer(covmap_global, covmap_record); + llvm::set_global_constant(covmap_global, true); + llvm::set_linkage(covmap_global, llvm::Linkage::PrivateLinkage); + llvm::set_section(covmap_global, &llvm_cov::covmap_section_name(cx.llmod)); // LLVM's coverage mapping format specifies 8-byte alignment for items in this section. // <https://llvm.org/docs/CoverageMappingFormat.html> - llvm::set_alignment(llglobal, Align::EIGHT); - if cx.target_spec().supports_comdat() { - llvm::set_comdat(cx.llmod, llglobal, &func_record_var_name); - } - cx.add_used_global(llglobal); + llvm::set_alignment(covmap_global, Align::EIGHT); + + cx.add_used_global(covmap_global); } /// Each CGU will normally only emit coverage metadata for the functions that it actually generates. @@ -418,39 +257,35 @@ fn generate_covfun_record( /// coverage map (in a single designated CGU) so that we still emit coverage mappings for them. /// We also end up adding their symbol names to a special global array that LLVM will include in /// its embedded coverage data. -fn add_unused_functions(cx: &CodegenCx<'_, '_>) { +fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> { assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); let tcx = cx.tcx; let usage = prepare_usage_sets(tcx); let is_unused_fn = |def_id: LocalDefId| -> bool { - let def_id = def_id.to_def_id(); - - // To be eligible for "unused function" mappings, a definition must: - // - Be function-like + // Usage sets expect `DefId`, so convert from `LocalDefId`. + let d: DefId = LocalDefId::to_def_id(def_id); + // To be potentially eligible for "unused function" mappings, a definition must: + // - Be eligible for coverage instrumentation // - Not participate directly in codegen (or have lost all its coverage statements) // - Not have any coverage statements inlined into codegenned functions - tcx.def_kind(def_id).is_fn_like() - && (!usage.all_mono_items.contains(&def_id) - || usage.missing_own_coverage.contains(&def_id)) - && !usage.used_via_inlining.contains(&def_id) + tcx.is_eligible_for_coverage(def_id) + && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d)) + && !usage.used_via_inlining.contains(&d) }; - // Scan for unused functions that were instrumented for coverage. - for def_id in tcx.mir_keys(()).iter().copied().filter(|&def_id| is_unused_fn(def_id)) { - // Get the coverage info from MIR, skipping functions that were never instrumented. - let body = tcx.optimized_mir(def_id); - let Some(function_coverage_info) = body.function_coverage_info.as_deref() else { continue }; + // FIXME(#79651): Consider trying to filter out dummy instantiations of + // unused generic functions from library crates, because they can produce + // "unused instantiation" in coverage reports even when they are actually + // used by some downstream crate in the same binary. - // FIXME(79651): Consider trying to filter out dummy instantiations of - // unused generic functions from library crates, because they can produce - // "unused instantiation" in coverage reports even when they are actually - // used by some downstream crate in the same binary. - - debug!("generating unused fn: {def_id:?}"); - add_unused_function_coverage(cx, def_id, function_coverage_info); - } + tcx.mir_keys(()) + .iter() + .copied() + .filter(|&def_id| is_unused_fn(def_id)) + .map(|def_id| make_dummy_instance(tcx, def_id)) + .collect::<Vec<_>>() } struct UsageSets<'tcx> { @@ -515,16 +350,11 @@ fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> { UsageSets { all_mono_items, used_via_inlining, missing_own_coverage } } -fn add_unused_function_coverage<'tcx>( - cx: &CodegenCx<'_, 'tcx>, - def_id: LocalDefId, - function_coverage_info: &'tcx mir::coverage::FunctionCoverageInfo, -) { - let tcx = cx.tcx; - let def_id = def_id.to_def_id(); +fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> { + let def_id = local_def_id.to_def_id(); // Make a dummy instance that fills in all generics with placeholders. - let instance = ty::Instance::new( + ty::Instance::new( def_id, ty::GenericArgs::for_item(tcx, def_id, |param, _| { if let ty::GenericParamDefKind::Lifetime = param.kind { @@ -533,14 +363,5 @@ fn add_unused_function_coverage<'tcx>( tcx.mk_param_from_def(param) } }), - ); - - // An unused function's mappings will all be rewritten to map to zero. - let function_coverage = FunctionCoverageCollector::unused( - instance, - function_coverage_info, - tcx.coverage_ids_info(instance.def), - ); - - cx.coverage_cx().function_coverage_map.borrow_mut().insert(instance, function_coverage); + ) } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs new file mode 100644 index 00000000000..5428d776f41 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs @@ -0,0 +1,252 @@ +//! For each function that was instrumented for coverage, we need to embed its +//! corresponding coverage mapping metadata inside the `__llvm_covfun`[^win] +//! linker section of the final binary. +//! +//! [^win]: On Windows the section name is `.lcovfun`. + +use std::ffi::CString; + +use rustc_abi::Align; +use rustc_codegen_ssa::traits::{ + BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods, +}; +use rustc_middle::mir::coverage::{ + CovTerm, CoverageIdsInfo, Expression, FunctionCoverageInfo, Mapping, MappingKind, Op, +}; +use rustc_middle::ty::{Instance, TyCtxt}; +use rustc_span::Span; +use rustc_target::spec::HasTargetSpec; +use tracing::debug; + +use crate::common::CodegenCx; +use crate::coverageinfo::mapgen::{GlobalFileTable, VirtualFileMapping, spans}; +use crate::coverageinfo::{ffi, llvm_cov}; +use crate::llvm; + +/// Intermediate coverage metadata for a single function, used to help build +/// the final record that will be embedded in the `__llvm_covfun` section. +#[derive(Debug)] +pub(crate) struct CovfunRecord<'tcx> { + mangled_function_name: &'tcx str, + source_hash: u64, + is_used: bool, + + virtual_file_mapping: VirtualFileMapping, + expressions: Vec<ffi::CounterExpression>, + regions: ffi::Regions, +} + +impl<'tcx> CovfunRecord<'tcx> { + /// FIXME(Zalathar): Make this the responsibility of the code that determines + /// which functions are unused. + pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> { + (!self.is_used).then_some(self.mangled_function_name) + } +} + +pub(crate) fn prepare_covfun_record<'tcx>( + tcx: TyCtxt<'tcx>, + global_file_table: &mut GlobalFileTable, + instance: Instance<'tcx>, + is_used: bool, +) -> Option<CovfunRecord<'tcx>> { + let fn_cov_info = tcx.instance_mir(instance.def).function_coverage_info.as_deref()?; + let ids_info = tcx.coverage_ids_info(instance.def); + + let expressions = prepare_expressions(fn_cov_info, ids_info, is_used); + + let mut covfun = CovfunRecord { + mangled_function_name: tcx.symbol_name(instance).name, + source_hash: if is_used { fn_cov_info.function_source_hash } else { 0 }, + is_used, + virtual_file_mapping: VirtualFileMapping::default(), + expressions, + regions: ffi::Regions::default(), + }; + + fill_region_tables(tcx, global_file_table, fn_cov_info, ids_info, &mut covfun); + + if covfun.regions.has_no_regions() { + debug!(?covfun, "function has no mappings to embed; skipping"); + return None; + } + + Some(covfun) +} + +/// Convert the function's coverage-counter expressions into a form suitable for FFI. +fn prepare_expressions( + fn_cov_info: &FunctionCoverageInfo, + ids_info: &CoverageIdsInfo, + is_used: bool, +) -> Vec<ffi::CounterExpression> { + // If any counters or expressions were removed by MIR opts, replace their + // terms with zero. + let counter_for_term = |term| { + if !is_used || ids_info.is_zero_term(term) { + ffi::Counter::ZERO + } else { + ffi::Counter::from_term(term) + } + }; + + // We know that LLVM will optimize out any unused expressions before + // producing the final coverage map, so there's no need to do the same + // thing on the Rust side unless we're confident we can do much better. + // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.) + fn_cov_info + .expressions + .iter() + .map(move |&Expression { lhs, op, rhs }| ffi::CounterExpression { + lhs: counter_for_term(lhs), + kind: match op { + Op::Add => ffi::ExprKind::Add, + Op::Subtract => ffi::ExprKind::Subtract, + }, + rhs: counter_for_term(rhs), + }) + .collect::<Vec<_>>() +} + +/// Populates the mapping region tables in the current function's covfun record. +fn fill_region_tables<'tcx>( + tcx: TyCtxt<'tcx>, + global_file_table: &mut GlobalFileTable, + fn_cov_info: &'tcx FunctionCoverageInfo, + ids_info: &'tcx CoverageIdsInfo, + covfun: &mut CovfunRecord<'tcx>, +) { + // Currently a function's mappings must all be in the same file as its body span. + let source_map = tcx.sess.source_map(); + let source_file = source_map.lookup_source_file(fn_cov_info.body_span.lo()); + + // Look up the global file ID for that file. + let global_file_id = global_file_table.global_file_id_for_file(&source_file); + + // Associate that global file ID with a local file ID for this function. + let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id); + + let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } = + &mut covfun.regions; + + let make_cov_span = |span: Span| { + spans::make_coverage_span(local_file_id, source_map, fn_cov_info, &source_file, span) + }; + let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen(); + + // For each counter/region pair in this function+file, convert it to a + // form suitable for FFI. + let is_zero_term = |term| !covfun.is_used || ids_info.is_zero_term(term); + for &Mapping { ref kind, span } in &fn_cov_info.mappings { + // If the mapping refers to counters/expressions that were removed by + // MIR opts, replace those occurrences with zero. + let kind = kind.map_terms(|term| if is_zero_term(term) { CovTerm::Zero } else { term }); + + // Convert the `Span` into coordinates that we can pass to LLVM, or + // discard the span if conversion fails. In rare, cases _all_ of a + // function's spans are discarded, and the rest of coverage codegen + // needs to handle that gracefully to avoid a repeat of #133606. + // We don't have a good test case for triggering that organically, so + // instead we set `-Zcoverage-options=discard-all-spans-in-codegen` + // to force it to occur. + let Some(cov_span) = make_cov_span(span) else { continue }; + if discard_all { + continue; + } + + match kind { + MappingKind::Code(term) => { + code_regions + .push(ffi::CodeRegion { cov_span, counter: ffi::Counter::from_term(term) }); + } + MappingKind::Branch { true_term, false_term } => { + branch_regions.push(ffi::BranchRegion { + cov_span, + true_counter: ffi::Counter::from_term(true_term), + false_counter: ffi::Counter::from_term(false_term), + }); + } + MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => { + mcdc_branch_regions.push(ffi::MCDCBranchRegion { + cov_span, + true_counter: ffi::Counter::from_term(true_term), + false_counter: ffi::Counter::from_term(false_term), + mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params), + }); + } + MappingKind::MCDCDecision(mcdc_decision_params) => { + mcdc_decision_regions.push(ffi::MCDCDecisionRegion { + cov_span, + mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params), + }); + } + } + } +} + +/// Generates the contents of the covfun record for this function, which +/// contains the function's coverage mapping data. The record is then stored +/// as a global variable in the `__llvm_covfun` section. +pub(crate) fn generate_covfun_record<'tcx>( + cx: &CodegenCx<'_, 'tcx>, + filenames_hash: u64, + covfun: &CovfunRecord<'tcx>, +) { + let &CovfunRecord { + mangled_function_name, + source_hash, + is_used, + ref virtual_file_mapping, + ref expressions, + ref regions, + } = covfun; + + // Encode the function's coverage mappings into a buffer. + let coverage_mapping_buffer = llvm_cov::write_function_mappings_to_buffer( + &virtual_file_mapping.to_vec(), + expressions, + regions, + ); + + // A covfun record consists of four target-endian integers, followed by the + // encoded mapping data in bytes. Note that the length field is 32 bits. + // <https://llvm.org/docs/CoverageMappingFormat.html#llvm-ir-representation> + // See also `src/llvm-project/clang/lib/CodeGen/CoverageMappingGen.cpp` and + // `COVMAP_V3` in `src/llvm-project/llvm/include/llvm/ProfileData/InstrProfData.inc`. + let func_name_hash = llvm_cov::hash_bytes(mangled_function_name.as_bytes()); + let covfun_record = cx.const_struct( + &[ + cx.const_u64(func_name_hash), + cx.const_u32(coverage_mapping_buffer.len() as u32), + cx.const_u64(source_hash), + cx.const_u64(filenames_hash), + cx.const_bytes(&coverage_mapping_buffer), + ], + // This struct needs to be packed, so that the 32-bit length field + // doesn't have unexpected padding. + true, + ); + + // Choose a variable name to hold this function's covfun data. + // Functions that are used have a suffix ("u") to distinguish them from + // unused copies of the same function (from different CGUs), so that if a + // linker sees both it won't discard the used copy's data. + let u = if is_used { "u" } else { "" }; + let covfun_var_name = CString::new(format!("__covrec_{func_name_hash:X}{u}")).unwrap(); + debug!("function record var name: {covfun_var_name:?}"); + + let covfun_global = llvm::add_global(cx.llmod, cx.val_ty(covfun_record), &covfun_var_name); + llvm::set_initializer(covfun_global, covfun_record); + llvm::set_global_constant(covfun_global, true); + llvm::set_linkage(covfun_global, llvm::Linkage::LinkOnceODRLinkage); + llvm::set_visibility(covfun_global, llvm::Visibility::Hidden); + llvm::set_section(covfun_global, cx.covfun_section_name()); + // LLVM's coverage mapping format specifies 8-byte alignment for items in this section. + // <https://llvm.org/docs/CoverageMappingFormat.html> + llvm::set_alignment(covfun_global, Align::EIGHT); + if cx.target_spec().supports_comdat() { + llvm::set_comdat(cx.llmod, covfun_global, &covfun_var_name); + } + + cx.add_used_global(covfun_global); +} diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs new file mode 100644 index 00000000000..6d1d91340c2 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs @@ -0,0 +1,126 @@ +use rustc_middle::mir::coverage::FunctionCoverageInfo; +use rustc_span::source_map::SourceMap; +use rustc_span::{BytePos, Pos, SourceFile, Span}; +use tracing::debug; + +use crate::coverageinfo::ffi; +use crate::coverageinfo::mapgen::LocalFileId; + +/// Converts the span into its start line and column, and end line and column. +/// +/// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by +/// the compiler, these column numbers are denoted in **bytes**, because that's what +/// LLVM's `llvm-cov` tool expects to see in coverage maps. +/// +/// Returns `None` if the conversion failed for some reason. This shouldn't happen, +/// but it's hard to rule out entirely (especially in the presence of complex macros +/// or other expansions), and if it does happen then skipping a span or function is +/// better than an ICE or `llvm-cov` failure that the user might have no way to avoid. +pub(crate) fn make_coverage_span( + file_id: LocalFileId, + source_map: &SourceMap, + fn_cov_info: &FunctionCoverageInfo, + file: &SourceFile, + span: Span, +) -> Option<ffi::CoverageSpan> { + let span = ensure_non_empty_span(source_map, fn_cov_info, span)?; + + let lo = span.lo(); + let hi = span.hi(); + + // Column numbers need to be in bytes, so we can't use the more convenient + // `SourceMap` methods for looking up file coordinates. + let line_and_byte_column = |pos: BytePos| -> Option<(usize, usize)> { + let rpos = file.relative_position(pos); + let line_index = file.lookup_line(rpos)?; + let line_start = file.lines()[line_index]; + // Line numbers and column numbers are 1-based, so add 1 to each. + Some((line_index + 1, (rpos - line_start).to_usize() + 1)) + }; + + let (mut start_line, start_col) = line_and_byte_column(lo)?; + let (mut end_line, end_col) = line_and_byte_column(hi)?; + + // Apply an offset so that code in doctests has correct line numbers. + // FIXME(#79417): Currently we have no way to offset doctest _columns_. + start_line = source_map.doctest_offset_line(&file.name, start_line); + end_line = source_map.doctest_offset_line(&file.name, end_line); + + check_coverage_span(ffi::CoverageSpan { + file_id: file_id.as_u32(), + start_line: start_line as u32, + start_col: start_col as u32, + end_line: end_line as u32, + end_col: end_col as u32, + }) +} + +fn ensure_non_empty_span( + source_map: &SourceMap, + fn_cov_info: &FunctionCoverageInfo, + span: Span, +) -> Option<Span> { + if !span.is_empty() { + return Some(span); + } + + let lo = span.lo(); + let hi = span.hi(); + + // The span is empty, so try to expand it to cover an adjacent '{' or '}', + // but only within the bounds of the body span. + let try_next = hi < fn_cov_info.body_span.hi(); + let try_prev = fn_cov_info.body_span.lo() < lo; + if !(try_next || try_prev) { + return None; + } + + source_map + .span_to_source(span, |src, start, end| try { + // Adjusting span endpoints by `BytePos(1)` is normally a bug, + // but in this case we have specifically checked that the character + // we're skipping over is one of two specific ASCII characters, so + // adjusting by exactly 1 byte is correct. + if try_next && src.as_bytes()[end] == b'{' { + Some(span.with_hi(hi + BytePos(1))) + } else if try_prev && src.as_bytes()[start - 1] == b'}' { + Some(span.with_lo(lo - BytePos(1))) + } else { + None + } + }) + .ok()? +} + +/// If `llvm-cov` sees a source region that is improperly ordered (end < start), +/// it will immediately exit with a fatal error. To prevent that from happening, +/// discard regions that are improperly ordered, or might be interpreted in a +/// way that makes them improperly ordered. +fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> { + let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span; + + // Line/column coordinates are supposed to be 1-based. If we ever emit + // coordinates of 0, `llvm-cov` might misinterpret them. + let all_nonzero = [start_line, start_col, end_line, end_col].into_iter().all(|x| x != 0); + // Coverage mappings use the high bit of `end_col` to indicate that a + // region is actually a "gap" region, so make sure it's unset. + let end_col_has_high_bit_unset = (end_col & (1 << 31)) == 0; + // If a region is improperly ordered (end < start), `llvm-cov` will exit + // with a fatal error, which is inconvenient for users and hard to debug. + let is_ordered = (start_line, start_col) <= (end_line, end_col); + + if all_nonzero && end_col_has_high_bit_unset && is_ordered { + Some(cov_span) + } else { + debug!( + ?cov_span, + ?all_nonzero, + ?end_col_has_high_bit_unset, + ?is_ordered, + "Skipping source region that would be misinterpreted or rejected by LLVM" + ); + // If this happens in a debug build, ICE to make it easier to notice. + debug_assert!(false, "Improper source region: {cov_span:?}"); + None + } +} diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index c2fcb33f98b..7311cd9d230 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -5,7 +5,7 @@ use rustc_abi::Size; use rustc_codegen_ssa::traits::{ BuilderMethods, ConstCodegenMethods, CoverageInfoBuilderMethods, MiscCodegenMethods, }; -use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; +use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; use rustc_middle::mir::coverage::CoverageKind; use rustc_middle::ty::Instance; use rustc_middle::ty::layout::HasTyCtxt; @@ -13,19 +13,16 @@ use tracing::{debug, instrument}; use crate::builder::Builder; use crate::common::CodegenCx; -use crate::coverageinfo::map_data::FunctionCoverageCollector; use crate::llvm; pub(crate) mod ffi; mod llvm_cov; -pub(crate) mod map_data; mod mapgen; /// Extra per-CGU context/state needed for coverage instrumentation. pub(crate) struct CguCoverageContext<'ll, 'tcx> { /// Coverage data for each instrumented function identified by DefId. - pub(crate) function_coverage_map: - RefCell<FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>>>, + pub(crate) instances_used: RefCell<FxIndexSet<Instance<'tcx>>>, pub(crate) pgo_func_name_var_map: RefCell<FxHashMap<Instance<'tcx>, &'ll llvm::Value>>, pub(crate) mcdc_condition_bitmap_map: RefCell<FxHashMap<Instance<'tcx>, Vec<&'ll llvm::Value>>>, @@ -35,19 +32,13 @@ pub(crate) struct CguCoverageContext<'ll, 'tcx> { impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> { pub(crate) fn new() -> Self { Self { - function_coverage_map: Default::default(), + instances_used: RefCell::<FxIndexSet<_>>::default(), pgo_func_name_var_map: Default::default(), mcdc_condition_bitmap_map: Default::default(), covfun_section_name: Default::default(), } } - fn take_function_coverage_map( - &self, - ) -> FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>> { - self.function_coverage_map.replace(FxIndexMap::default()) - } - /// LLVM use a temp value to record evaluated mcdc test vector of each decision, which is /// called condition bitmap. In order to handle nested decisions, several condition bitmaps can /// be allocated for a function body. These values are named `mcdc.addr.{i}` and are a 32-bit @@ -160,13 +151,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { // Mark the instance as used in this CGU, for coverage purposes. // This includes functions that were not partitioned into this CGU, // but were MIR-inlined into one of this CGU's functions. - coverage_cx.function_coverage_map.borrow_mut().entry(instance).or_insert_with(|| { - FunctionCoverageCollector::new( - instance, - function_coverage_info, - bx.tcx.coverage_ids_info(instance.def), - ) - }); + coverage_cx.instances_used.borrow_mut().insert(instance); match *kind { CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!( |
