diff options
| author | Laurențiu Nicola <lnicola@dend.ro> | 2024-05-19 11:20:26 +0300 |
|---|---|---|
| committer | Laurențiu Nicola <lnicola@dend.ro> | 2024-05-19 11:20:26 +0300 |
| commit | 6ead205843eabf8121107b2f95c55be857f195e6 (patch) | |
| tree | ec4e9aabbf7b678dd04192d806e1ee7cc8987263 /compiler/rustc_codegen_llvm/src/coverageinfo | |
| parent | 2018426f569829bf5bb8866e16957eaef508f770 (diff) | |
| parent | 6579ed89f0fcc26da71afdd11d30d63f6f812a0a (diff) | |
| download | rust-6ead205843eabf8121107b2f95c55be857f195e6.tar.gz rust-6ead205843eabf8121107b2f95c55be857f195e6.zip | |
Merge from rust-lang/rust
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo')
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs | 448 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs | 272 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs | 488 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs | 365 |
4 files changed, 1573 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs new file mode 100644 index 00000000000..12a846a49ec --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs @@ -0,0 +1,448 @@ +use rustc_middle::mir::coverage::{ + CodeRegion, ConditionInfo, CounterId, CovTerm, DecisionInfo, ExpressionId, MappingKind, +}; + +/// Must match the layout of `LLVMRustCounterKind`. +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub enum CounterKind { + Zero = 0, + CounterValueReference = 1, + Expression = 2, +} + +/// A reference to an instance of an abstract "counter" that will yield a value in a coverage +/// report. Note that `id` has different interpretations, depending on the `kind`: +/// * For `CounterKind::Zero`, `id` is assumed to be `0` +/// * For `CounterKind::CounterValueReference`, `id` matches the `counter_id` of the injected +/// instrumentation counter (the `index` argument to the LLVM intrinsic +/// `instrprof.increment()`) +/// * For `CounterKind::Expression`, `id` is the index into the coverage map's array of +/// counter expressions. +/// +/// Corresponds to struct `llvm::coverage::Counter`. +/// +/// Must match the layout of `LLVMRustCounter`. +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Counter { + // Important: The layout (order and types of fields) must match its C++ counterpart. + pub kind: CounterKind, + id: u32, +} + +impl Counter { + /// A `Counter` of kind `Zero`. For this counter kind, the `id` is not used. + pub(crate) const ZERO: Self = Self { kind: CounterKind::Zero, id: 0 }; + + /// Constructs a new `Counter` of kind `CounterValueReference`. + pub fn counter_value_reference(counter_id: CounterId) -> Self { + Self { kind: CounterKind::CounterValueReference, id: counter_id.as_u32() } + } + + /// Constructs a new `Counter` of kind `Expression`. + pub(crate) fn expression(expression_id: ExpressionId) -> Self { + Self { kind: CounterKind::Expression, id: expression_id.as_u32() } + } + + pub(crate) fn from_term(term: CovTerm) -> Self { + match term { + CovTerm::Zero => Self::ZERO, + CovTerm::Counter(id) => Self::counter_value_reference(id), + CovTerm::Expression(id) => Self::expression(id), + } + } +} + +/// Corresponds to enum `llvm::coverage::CounterExpression::ExprKind`. +/// +/// Must match the layout of `LLVMRustCounterExprKind`. +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub enum ExprKind { + Subtract = 0, + Add = 1, +} + +/// Corresponds to struct `llvm::coverage::CounterExpression`. +/// +/// Must match the layout of `LLVMRustCounterExpression`. +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct CounterExpression { + pub kind: ExprKind, + pub lhs: Counter, + pub rhs: Counter, +} + +/// Corresponds to enum `llvm::coverage::CounterMappingRegion::RegionKind`. +/// +/// Must match the layout of `LLVMRustCounterMappingRegionKind`. +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub enum RegionKind { + /// A CodeRegion associates some code with a counter + CodeRegion = 0, + + /// An ExpansionRegion represents a file expansion region that associates + /// a source range with the expansion of a virtual source file, such as + /// for a macro instantiation or #include file. + ExpansionRegion = 1, + + /// A SkippedRegion represents a source range with code that was skipped + /// by a preprocessor or similar means. + SkippedRegion = 2, + + /// A GapRegion is like a CodeRegion, but its count is only set as the + /// line execution count when its the only region in the line. + GapRegion = 3, + + /// A BranchRegion represents leaf-level boolean expressions and is + /// associated with two counters, each representing the number of times the + /// expression evaluates to true or false. + BranchRegion = 4, + + /// A DecisionRegion represents a top-level boolean expression and is + /// associated with a variable length bitmap index and condition number. + MCDCDecisionRegion = 5, + + /// A Branch Region can be extended to include IDs to facilitate MC/DC. + MCDCBranchRegion = 6, +} + +pub mod mcdc { + use rustc_middle::mir::coverage::{ConditionInfo, DecisionInfo}; + + /// Must match the layout of `LLVMRustMCDCDecisionParameters`. + #[repr(C)] + #[derive(Clone, Copy, Debug, Default)] + pub struct DecisionParameters { + bitmap_idx: u32, + conditions_num: u16, + } + + // ConditionId in llvm is `unsigned int` at 18 while `int16_t` at [19](https://github.com/llvm/llvm-project/pull/81257) + type LLVMConditionId = i16; + + /// Must match the layout of `LLVMRustMCDCBranchParameters`. + #[repr(C)] + #[derive(Clone, Copy, Debug, Default)] + pub struct BranchParameters { + condition_id: LLVMConditionId, + condition_ids: [LLVMConditionId; 2], + } + + #[repr(C)] + #[derive(Clone, Copy, Debug)] + pub enum ParameterTag { + None = 0, + Decision = 1, + Branch = 2, + } + /// Same layout with `LLVMRustMCDCParameters` + #[repr(C)] + #[derive(Clone, Copy, Debug)] + pub struct Parameters { + tag: ParameterTag, + decision_params: DecisionParameters, + branch_params: BranchParameters, + } + + impl Parameters { + pub fn none() -> Self { + Self { + tag: ParameterTag::None, + decision_params: Default::default(), + branch_params: Default::default(), + } + } + pub fn decision(decision_params: DecisionParameters) -> Self { + Self { tag: ParameterTag::Decision, decision_params, branch_params: Default::default() } + } + pub fn branch(branch_params: BranchParameters) -> Self { + Self { tag: ParameterTag::Branch, decision_params: Default::default(), branch_params } + } + } + + impl From<ConditionInfo> for BranchParameters { + fn from(value: ConditionInfo) -> Self { + Self { + condition_id: value.condition_id.as_u32() as LLVMConditionId, + condition_ids: [ + value.false_next_id.as_u32() as LLVMConditionId, + value.true_next_id.as_u32() as LLVMConditionId, + ], + } + } + } + + impl From<DecisionInfo> for DecisionParameters { + fn from(value: DecisionInfo) -> Self { + Self { bitmap_idx: value.bitmap_idx, conditions_num: value.conditions_num } + } + } +} + +/// This struct provides LLVM's representation of a "CoverageMappingRegion", encoded into the +/// coverage map, in accordance with the +/// [LLVM Code Coverage Mapping Format](https://github.com/rust-lang/llvm-project/blob/rustc/13.0-2021-09-30/llvm/docs/CoverageMappingFormat.rst#llvm-code-coverage-mapping-format). +/// The struct composes fields representing the `Counter` type and value(s) (injected counter +/// ID, or expression type and operands), the source file (an indirect index into a "filenames +/// array", encoded separately), and source location (start and end positions of the represented +/// code region). +/// +/// Corresponds to struct `llvm::coverage::CounterMappingRegion`. +/// +/// Must match the layout of `LLVMRustCounterMappingRegion`. +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct CounterMappingRegion { + /// The counter type and type-dependent counter data, if any. + counter: Counter, + + /// If the `RegionKind` is a `BranchRegion`, this represents the counter + /// for the false branch of the region. + false_counter: Counter, + + mcdc_params: mcdc::Parameters, + /// An indirect reference to the source filename. In the LLVM Coverage Mapping Format, the + /// file_id is an index into a function-specific `virtual_file_mapping` array of indexes + /// that, in turn, are used to look up the filename for this region. + file_id: u32, + + /// If the `RegionKind` is an `ExpansionRegion`, the `expanded_file_id` can be used to find + /// the mapping regions created as a result of macro expansion, by checking if their file id + /// matches the expanded file id. + expanded_file_id: u32, + + /// 1-based starting line of the mapping region. + start_line: u32, + + /// 1-based starting column of the mapping region. + start_col: u32, + + /// 1-based ending line of the mapping region. + end_line: u32, + + /// 1-based ending column of the mapping region. If the high bit is set, the current + /// mapping region is a gap area. + end_col: u32, + + kind: RegionKind, +} + +impl CounterMappingRegion { + pub(crate) fn from_mapping( + mapping_kind: &MappingKind, + local_file_id: u32, + code_region: &CodeRegion, + ) -> Self { + let &CodeRegion { file_name: _, start_line, start_col, end_line, end_col } = code_region; + match *mapping_kind { + MappingKind::Code(term) => Self::code_region( + Counter::from_term(term), + local_file_id, + start_line, + start_col, + end_line, + end_col, + ), + MappingKind::Branch { true_term, false_term } => Self::branch_region( + Counter::from_term(true_term), + Counter::from_term(false_term), + local_file_id, + start_line, + start_col, + end_line, + end_col, + ), + MappingKind::MCDCBranch { true_term, false_term, mcdc_params } => { + Self::mcdc_branch_region( + Counter::from_term(true_term), + Counter::from_term(false_term), + mcdc_params, + local_file_id, + start_line, + start_col, + end_line, + end_col, + ) + } + MappingKind::MCDCDecision(decision_info) => Self::decision_region( + decision_info, + local_file_id, + start_line, + start_col, + end_line, + end_col, + ), + } + } + + pub(crate) fn code_region( + counter: Counter, + file_id: u32, + start_line: u32, + start_col: u32, + end_line: u32, + end_col: u32, + ) -> Self { + Self { + counter, + false_counter: Counter::ZERO, + mcdc_params: mcdc::Parameters::none(), + file_id, + expanded_file_id: 0, + start_line, + start_col, + end_line, + end_col, + kind: RegionKind::CodeRegion, + } + } + + pub(crate) fn branch_region( + counter: Counter, + false_counter: Counter, + file_id: u32, + start_line: u32, + start_col: u32, + end_line: u32, + end_col: u32, + ) -> Self { + Self { + counter, + false_counter, + mcdc_params: mcdc::Parameters::none(), + file_id, + expanded_file_id: 0, + start_line, + start_col, + end_line, + end_col, + kind: RegionKind::BranchRegion, + } + } + + pub(crate) fn mcdc_branch_region( + counter: Counter, + false_counter: Counter, + condition_info: ConditionInfo, + file_id: u32, + start_line: u32, + start_col: u32, + end_line: u32, + end_col: u32, + ) -> Self { + Self { + counter, + false_counter, + mcdc_params: mcdc::Parameters::branch(condition_info.into()), + file_id, + expanded_file_id: 0, + start_line, + start_col, + end_line, + end_col, + kind: RegionKind::MCDCBranchRegion, + } + } + + pub(crate) fn decision_region( + decision_info: DecisionInfo, + file_id: u32, + start_line: u32, + start_col: u32, + end_line: u32, + end_col: u32, + ) -> Self { + let mcdc_params = mcdc::Parameters::decision(decision_info.into()); + + Self { + counter: Counter::ZERO, + false_counter: Counter::ZERO, + mcdc_params, + file_id, + expanded_file_id: 0, + start_line, + start_col, + end_line, + end_col, + kind: RegionKind::MCDCDecisionRegion, + } + } + + // This function might be used in the future; the LLVM API is still evolving, as is coverage + // support. + #[allow(dead_code)] + pub(crate) fn expansion_region( + file_id: u32, + expanded_file_id: u32, + start_line: u32, + start_col: u32, + end_line: u32, + end_col: u32, + ) -> Self { + Self { + counter: Counter::ZERO, + false_counter: Counter::ZERO, + mcdc_params: mcdc::Parameters::none(), + file_id, + expanded_file_id, + start_line, + start_col, + end_line, + end_col, + kind: RegionKind::ExpansionRegion, + } + } + + // This function might be used in the future; the LLVM API is still evolving, as is coverage + // support. + #[allow(dead_code)] + pub(crate) fn skipped_region( + file_id: u32, + start_line: u32, + start_col: u32, + end_line: u32, + end_col: u32, + ) -> Self { + Self { + counter: Counter::ZERO, + false_counter: Counter::ZERO, + mcdc_params: mcdc::Parameters::none(), + file_id, + expanded_file_id: 0, + start_line, + start_col, + end_line, + end_col, + kind: RegionKind::SkippedRegion, + } + } + + // This function might be used in the future; the LLVM API is still evolving, as is coverage + // support. + #[allow(dead_code)] + pub(crate) fn gap_region( + counter: Counter, + file_id: u32, + start_line: u32, + start_col: u32, + end_line: u32, + end_col: u32, + ) -> Self { + Self { + counter, + false_counter: Counter::ZERO, + mcdc_params: mcdc::Parameters::none(), + file_id, + expanded_file_id: 0, + start_line, + start_col, + end_line, + end_col: (1_u32 << 31) | end_col, + kind: RegionKind::GapRegion, + } + } +} diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs new file mode 100644 index 00000000000..d85d9411f03 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs @@ -0,0 +1,272 @@ +use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind}; + +use rustc_data_structures::captures::Captures; +use rustc_data_structures::fx::FxIndexSet; +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::coverage::{ + CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, + MappingKind, Op, +}; +use rustc_middle::ty::Instance; +use rustc_span::Symbol; + +/// Holds all of the coverage mapping data associated with a function instance, +/// collected during traversal of `Coverage` statements in the function's MIR. +#[derive(Debug)] +pub struct FunctionCoverageCollector<'tcx> { + /// Coverage info that was attached to this function by the instrumentor. + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, + + /// Tracks which counters have been seen, so that we can identify mappings + /// to counters that were optimized out, and set them to zero. + counters_seen: BitSet<CounterId>, + /// Contains all expression IDs that have been seen in an `ExpressionUsed` + /// coverage statement, plus all expression IDs that aren't directly used + /// by any mappings (and therefore do not have expression-used statements). + /// After MIR traversal is finished, we can conclude that any IDs missing + /// from this set must have had their statements deleted by MIR opts. + expressions_seen: BitSet<ExpressionId>, +} + +impl<'tcx> FunctionCoverageCollector<'tcx> { + /// Creates a new set of coverage data for a used (called) function. + pub fn new( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + ) -> Self { + Self::create(instance, function_coverage_info, true) + } + + /// Creates a new set of coverage data for an unused (never called) function. + pub fn unused( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + ) -> Self { + Self::create(instance, function_coverage_info, false) + } + + fn create( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, + ) -> Self { + let num_counters = function_coverage_info.num_counters; + let num_expressions = function_coverage_info.expressions.len(); + debug!( + "FunctionCoverage::create(instance={instance:?}) has \ + num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}" + ); + + // Create a filled set of expression IDs, so that expressions not + // directly used by mappings will be treated as "seen". + // (If they end up being unused, LLVM will delete them for us.) + let mut expressions_seen = BitSet::new_filled(num_expressions); + // For each expression ID that is directly used by one or more mappings, + // mark it as not-yet-seen. This indicates that we expect to see a + // corresponding `ExpressionUsed` statement during MIR traversal. + for term in function_coverage_info.mappings.iter().flat_map(|m| m.kind.terms()) { + if let CovTerm::Expression(id) = term { + expressions_seen.remove(id); + } + } + + Self { + function_coverage_info, + is_used, + counters_seen: BitSet::new_empty(num_counters), + expressions_seen, + } + } + + /// Marks a counter ID as having been seen in a counter-increment statement. + #[instrument(level = "debug", skip(self))] + pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) { + self.counters_seen.insert(id); + } + + /// Marks an expression ID as having been seen in an expression-used statement. + #[instrument(level = "debug", skip(self))] + pub(crate) fn mark_expression_id_seen(&mut self, id: ExpressionId) { + self.expressions_seen.insert(id); + } + + /// Identify expressions that will always have a value of zero, and note + /// their IDs in [`ZeroExpressions`]. Mappings that refer to a zero expression + /// can instead become mappings to a constant zero value. + /// + /// This method mainly exists to preserve the simplifications that were + /// already being performed by the Rust-side expression renumbering, so that + /// the resulting coverage mappings don't get worse. + fn identify_zero_expressions(&self) -> ZeroExpressions { + // The set of expressions that either were optimized out entirely, or + // have zero as both of their operands, and will therefore always have + // a value of zero. Other expressions that refer to these as operands + // can have those operands replaced with `CovTerm::Zero`. + let mut zero_expressions = ZeroExpressions::default(); + + // Simplify a copy of each expression based on lower-numbered expressions, + // and then update the set of always-zero expressions if necessary. + // (By construction, expressions can only refer to other expressions + // that have lower IDs, so one pass is sufficient.) + for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() { + if !self.expressions_seen.contains(id) { + // If an expression was not seen, it must have been optimized away, + // so any operand that refers to it can be replaced with zero. + zero_expressions.insert(id); + continue; + } + + // We don't need to simplify the actual expression data in the + // expressions list; we can just simplify a temporary copy and then + // use that to update the set of always-zero expressions. + let Expression { mut lhs, op, mut rhs } = *expression; + + // If an expression has an operand that is also an expression, the + // operand's ID must be strictly lower. This is what lets us find + // all zero expressions in one pass. + let assert_operand_expression_is_lower = |operand_id: ExpressionId| { + assert!( + operand_id < id, + "Operand {operand_id:?} should be less than {id:?} in {expression:?}", + ) + }; + + // If an operand refers to a counter or expression that is always + // zero, then that operand can be replaced with `CovTerm::Zero`. + let maybe_set_operand_to_zero = |operand: &mut CovTerm| { + if let CovTerm::Expression(id) = *operand { + assert_operand_expression_is_lower(id); + } + + if is_zero_term(&self.counters_seen, &zero_expressions, *operand) { + *operand = CovTerm::Zero; + } + }; + maybe_set_operand_to_zero(&mut lhs); + maybe_set_operand_to_zero(&mut rhs); + + // Coverage counter values cannot be negative, so if an expression + // involves subtraction from zero, assume that its RHS must also be zero. + // (Do this after simplifications that could set the LHS to zero.) + if lhs == CovTerm::Zero && op == Op::Subtract { + rhs = CovTerm::Zero; + } + + // After the above simplifications, if both operands are zero, then + // we know that this expression is always zero too. + if lhs == CovTerm::Zero && rhs == CovTerm::Zero { + zero_expressions.insert(id); + } + } + + zero_expressions + } + + pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> { + let zero_expressions = self.identify_zero_expressions(); + let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self; + + FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions } + } +} + +pub(crate) struct FunctionCoverage<'tcx> { + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, + + counters_seen: BitSet<CounterId>, + zero_expressions: ZeroExpressions, +} + +impl<'tcx> FunctionCoverage<'tcx> { + /// Returns true for a used (called) function, and false for an unused function. + pub(crate) fn is_used(&self) -> bool { + self.is_used + } + + /// Return the source hash, generated from the HIR node structure, and used to indicate whether + /// or not the source code structure changed between different compilations. + pub fn source_hash(&self) -> u64 { + if self.is_used { self.function_coverage_info.function_source_hash } else { 0 } + } + + /// Returns an iterator over all filenames used by this function's mappings. + pub(crate) fn all_file_names(&self) -> impl Iterator<Item = Symbol> + Captures<'_> { + self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name) + } + + /// Convert this function's coverage expression data into a form that can be + /// passed through FFI to LLVM. + pub(crate) fn counter_expressions( + &self, + ) -> impl Iterator<Item = CounterExpression> + ExactSizeIterator + Captures<'_> { + // We know that LLVM will optimize out any unused expressions before + // producing the final coverage map, so there's no need to do the same + // thing on the Rust side unless we're confident we can do much better. + // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.) + + self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| { + CounterExpression { + lhs: self.counter_for_term(lhs), + kind: match op { + Op::Add => ExprKind::Add, + Op::Subtract => ExprKind::Subtract, + }, + rhs: self.counter_for_term(rhs), + } + }) + } + + /// Converts this function's coverage mappings into an intermediate form + /// that will be used by `mapgen` when preparing for FFI. + pub(crate) fn counter_regions( + &self, + ) -> impl Iterator<Item = (MappingKind, &CodeRegion)> + ExactSizeIterator { + self.function_coverage_info.mappings.iter().map(move |mapping| { + let Mapping { kind, code_region } = mapping; + let kind = + kind.map_terms(|term| if self.is_zero_term(term) { CovTerm::Zero } else { term }); + (kind, code_region) + }) + } + + fn counter_for_term(&self, term: CovTerm) -> Counter { + if self.is_zero_term(term) { Counter::ZERO } else { Counter::from_term(term) } + } + + fn is_zero_term(&self, term: CovTerm) -> bool { + is_zero_term(&self.counters_seen, &self.zero_expressions, term) + } +} + +/// Set of expression IDs that are known to always evaluate to zero. +/// Any mapping or expression operand that refers to these expressions can have +/// that reference replaced with a constant zero value. +#[derive(Default)] +struct ZeroExpressions(FxIndexSet<ExpressionId>); + +impl ZeroExpressions { + fn insert(&mut self, id: ExpressionId) { + self.0.insert(id); + } + + fn contains(&self, id: ExpressionId) -> bool { + self.0.contains(&id) + } +} + +/// Returns `true` if the given term is known to have a value of zero, taking +/// into account knowledge of which counters are unused and which expressions +/// are always zero. +fn is_zero_term( + counters_seen: &BitSet<CounterId>, + zero_expressions: &ZeroExpressions, + term: CovTerm, +) -> bool { + match term { + CovTerm::Zero => true, + CovTerm::Counter(id) => !counters_seen.contains(id), + CovTerm::Expression(id) => zero_expressions.contains(id), + } +} diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs new file mode 100644 index 00000000000..3f3969bbca3 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -0,0 +1,488 @@ +use crate::common::CodegenCx; +use crate::coverageinfo; +use crate::coverageinfo::ffi::CounterMappingRegion; +use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector}; +use crate::llvm; + +use itertools::Itertools as _; +use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods}; +use rustc_data_structures::fx::{FxHashSet, FxIndexMap, FxIndexSet}; +use rustc_hir::def_id::{DefId, LocalDefId}; +use rustc_index::IndexVec; +use rustc_middle::bug; +use rustc_middle::mir; +use rustc_middle::ty::{self, TyCtxt}; +use rustc_span::def_id::DefIdSet; +use rustc_span::Symbol; + +/// Generates and exports the Coverage Map. +/// +/// Rust Coverage Map generation supports LLVM Coverage Mapping Format versions +/// 6 and 7 (encoded as 5 and 6 respectively), as described at +/// [LLVM Code Coverage Mapping Format](https://github.com/rust-lang/llvm-project/blob/rustc/18.0-2024-02-13/llvm/docs/CoverageMappingFormat.rst). +/// These versions are supported by the LLVM coverage tools (`llvm-profdata` and `llvm-cov`) +/// distributed in the `llvm-tools-preview` rustup component. +/// +/// Consequently, Rust's bundled version of Clang also generates Coverage Maps compliant with +/// the same version. Clang's implementation of Coverage Map generation was referenced when +/// implementing this Rust version, and though the format documentation is very explicit and +/// detailed, some undocumented details in Clang's implementation (that may or may not be important) +/// were also replicated for Rust's Coverage Map. +pub fn finalize(cx: &CodegenCx<'_, '_>) { + let tcx = cx.tcx; + + // Ensure that LLVM is using a version of the coverage mapping format that + // agrees with our Rust-side code. Expected versions (encoded as n-1) are: + // - `CovMapVersion::Version6` (5) used by LLVM 13-17 + // - `CovMapVersion::Version7` (6) used by LLVM 18 + let covmap_version = { + let llvm_covmap_version = coverageinfo::mapping_version(); + let expected_versions = 5..=6; + assert!( + expected_versions.contains(&llvm_covmap_version), + "Coverage mapping version exposed by `llvm-wrapper` is out of sync; \ + expected {expected_versions:?} but was {llvm_covmap_version}" + ); + // This is the version number that we will embed in the covmap section: + llvm_covmap_version + }; + + debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name()); + + // In order to show that unused functions have coverage counts of zero (0), LLVM requires the + // functions exist. Generate synthetic functions with a (required) single counter, and add the + // MIR `Coverage` code regions to the `function_coverage_map`, before calling + // `ctx.take_function_coverage_map()`. + if cx.codegen_unit.is_code_coverage_dead_code_cgu() { + add_unused_functions(cx); + } + + let function_coverage_map = match cx.coverage_context() { + Some(ctx) => ctx.take_function_coverage_map(), + None => return, + }; + + if function_coverage_map.is_empty() { + // This module has no functions with coverage instrumentation + return; + } + + let function_coverage_entries = function_coverage_map + .into_iter() + .map(|(instance, function_coverage)| (instance, function_coverage.into_finished())) + .collect::<Vec<_>>(); + + let all_file_names = + function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names()); + let global_file_table = GlobalFileTable::new(all_file_names); + + // Encode all filenames referenced by coverage mappings in this CGU. + let filenames_buffer = global_file_table.make_filenames_buffer(tcx); + + let filenames_size = filenames_buffer.len(); + let filenames_val = cx.const_bytes(&filenames_buffer); + let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer); + + // Generate the coverage map header, which contains the filenames used by + // this CGU's coverage mappings, and store it in a well-known global. + let cov_data_val = generate_coverage_map(cx, covmap_version, filenames_size, filenames_val); + coverageinfo::save_cov_data_to_mod(cx, cov_data_val); + + let mut unused_function_names = Vec::new(); + let covfun_section_name = coverageinfo::covfun_section_name(cx); + + // Encode coverage mappings and generate function records + for (instance, function_coverage) in function_coverage_entries { + debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance); + + let mangled_function_name = tcx.symbol_name(instance).name; + let source_hash = function_coverage.source_hash(); + let is_used = function_coverage.is_used(); + + let coverage_mapping_buffer = + encode_mappings_for_function(&global_file_table, &function_coverage); + + if coverage_mapping_buffer.is_empty() { + if function_coverage.is_used() { + bug!( + "A used function should have had coverage mapping data but did not: {}", + mangled_function_name + ); + } else { + debug!("unused function had no coverage mapping data: {}", mangled_function_name); + continue; + } + } + + if !is_used { + unused_function_names.push(mangled_function_name); + } + + save_function_record( + cx, + &covfun_section_name, + mangled_function_name, + source_hash, + filenames_ref, + coverage_mapping_buffer, + is_used, + ); + } + + // For unused functions, we need to take their mangled names and store them + // in a specially-named global array. LLVM's `InstrProfiling` pass will + // detect this global and include those names in its `__llvm_prf_names` + // section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.) + if !unused_function_names.is_empty() { + assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); + + let name_globals = unused_function_names + .into_iter() + .map(|mangled_function_name| cx.const_str(mangled_function_name).0) + .collect::<Vec<_>>(); + let initializer = cx.const_array(cx.type_ptr(), &name_globals); + + let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), "__llvm_coverage_names"); + llvm::set_global_constant(array, true); + llvm::set_linkage(array, llvm::Linkage::InternalLinkage); + llvm::set_initializer(array, initializer); + } +} + +/// Maps "global" (per-CGU) file ID numbers to their underlying filenames. +struct GlobalFileTable { + /// This "raw" table doesn't include the working dir, so a filename's + /// global ID is its index in this set **plus one**. + raw_file_table: FxIndexSet<Symbol>, +} + +impl GlobalFileTable { + fn new(all_file_names: impl IntoIterator<Item = Symbol>) -> Self { + // Collect all of the filenames into a set. Filenames usually come in + // contiguous runs, so we can dedup adjacent ones to save work. + let mut raw_file_table = all_file_names.into_iter().dedup().collect::<FxIndexSet<Symbol>>(); + + // Sort the file table by its actual string values, not the arbitrary + // ordering of its symbols. + raw_file_table.sort_unstable_by(|a, b| a.as_str().cmp(b.as_str())); + + Self { raw_file_table } + } + + fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 { + let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| { + bug!("file name not found in prepared global file table: {file_name}"); + }); + // The raw file table doesn't include an entry for the working dir + // (which has ID 0), so add 1 to get the correct ID. + (raw_id + 1) as u32 + } + + fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec<u8> { + // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) + // requires setting the first filename to the compilation directory. + // Since rustc generates coverage maps with relative paths, the + // compilation directory can be combined with the relative paths + // to get absolute paths, if needed. + use rustc_session::config::RemapPathScopeComponents; + use rustc_session::RemapFileNameExt; + let working_dir: &str = &tcx + .sess + .opts + .working_dir + .for_scope(tcx.sess, RemapPathScopeComponents::MACRO) + .to_string_lossy(); + + llvm::build_byte_buffer(|buffer| { + coverageinfo::write_filenames_section_to_buffer( + // Insert the working dir at index 0, before the other filenames. + std::iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str)), + buffer, + ); + }) + } +} + +rustc_index::newtype_index! { + struct LocalFileId {} +} + +/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) +/// file IDs. +#[derive(Default)] +struct VirtualFileMapping { + local_to_global: IndexVec<LocalFileId, u32>, + global_to_local: FxIndexMap<u32, LocalFileId>, +} + +impl VirtualFileMapping { + fn local_id_for_global(&mut self, global_file_id: u32) -> LocalFileId { + *self + .global_to_local + .entry(global_file_id) + .or_insert_with(|| self.local_to_global.push(global_file_id)) + } + + fn into_vec(self) -> Vec<u32> { + self.local_to_global.raw + } +} + +/// Using the expressions and counter regions collected for a single function, +/// generate the variable-sized payload of its corresponding `__llvm_covfun` +/// entry. The payload is returned as a vector of bytes. +/// +/// Newly-encountered filenames will be added to the global file table. +fn encode_mappings_for_function( + global_file_table: &GlobalFileTable, + function_coverage: &FunctionCoverage<'_>, +) -> Vec<u8> { + let counter_regions = function_coverage.counter_regions(); + if counter_regions.is_empty() { + return Vec::new(); + } + + let expressions = function_coverage.counter_expressions().collect::<Vec<_>>(); + + let mut virtual_file_mapping = VirtualFileMapping::default(); + let mut mapping_regions = Vec::with_capacity(counter_regions.len()); + + // Group mappings into runs with the same filename, preserving the order + // yielded by `FunctionCoverage`. + // Prepare file IDs for each filename, and prepare the mapping data so that + // we can pass it through FFI to LLVM. + for (file_name, counter_regions_for_file) in + &counter_regions.group_by(|(_, region)| region.file_name) + { + // Look up the global file ID for this filename. + let global_file_id = global_file_table.global_file_id_for_file_name(file_name); + + // Associate that global file ID with a local file ID for this function. + let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id); + debug!(" file id: {local_file_id:?} => global {global_file_id} = '{file_name:?}'"); + + // For each counter/region pair in this function+file, convert it to a + // form suitable for FFI. + for (mapping_kind, region) in counter_regions_for_file { + debug!("Adding counter {mapping_kind:?} to map for {region:?}"); + mapping_regions.push(CounterMappingRegion::from_mapping( + &mapping_kind, + local_file_id.as_u32(), + region, + )); + } + } + + // Encode the function's coverage mappings into a buffer. + llvm::build_byte_buffer(|buffer| { + coverageinfo::write_mapping_to_buffer( + virtual_file_mapping.into_vec(), + expressions, + mapping_regions, + buffer, + ); + }) +} + +/// Construct coverage map header and the array of function records, and combine them into the +/// coverage map. Save the coverage map data into the LLVM IR as a static global using a +/// specific, well-known section and name. +fn generate_coverage_map<'ll>( + cx: &CodegenCx<'ll, '_>, + version: u32, + filenames_size: usize, + filenames_val: &'ll llvm::Value, +) -> &'ll llvm::Value { + debug!("cov map: filenames_size = {}, 0-based version = {}", filenames_size, version); + + // Create the coverage data header (Note, fields 0 and 2 are now always zero, + // as of `llvm::coverage::CovMapVersion::Version4`.) + let zero_was_n_records_val = cx.const_u32(0); + let filenames_size_val = cx.const_u32(filenames_size as u32); + let zero_was_coverage_size_val = cx.const_u32(0); + let version_val = cx.const_u32(version); + let cov_data_header_val = cx.const_struct( + &[zero_was_n_records_val, filenames_size_val, zero_was_coverage_size_val, version_val], + /*packed=*/ false, + ); + + // Create the complete LLVM coverage data value to add to the LLVM IR + cx.const_struct(&[cov_data_header_val, filenames_val], /*packed=*/ false) +} + +/// Construct a function record and combine it with the function's coverage mapping data. +/// Save the function record into the LLVM IR as a static global using a +/// specific, well-known section and name. +fn save_function_record( + cx: &CodegenCx<'_, '_>, + covfun_section_name: &str, + mangled_function_name: &str, + source_hash: u64, + filenames_ref: u64, + coverage_mapping_buffer: Vec<u8>, + is_used: bool, +) { + // Concatenate the encoded coverage mappings + let coverage_mapping_size = coverage_mapping_buffer.len(); + let coverage_mapping_val = cx.const_bytes(&coverage_mapping_buffer); + + let func_name_hash = coverageinfo::hash_bytes(mangled_function_name.as_bytes()); + let func_name_hash_val = cx.const_u64(func_name_hash); + let coverage_mapping_size_val = cx.const_u32(coverage_mapping_size as u32); + let source_hash_val = cx.const_u64(source_hash); + let filenames_ref_val = cx.const_u64(filenames_ref); + let func_record_val = cx.const_struct( + &[ + func_name_hash_val, + coverage_mapping_size_val, + source_hash_val, + filenames_ref_val, + coverage_mapping_val, + ], + /*packed=*/ true, + ); + + coverageinfo::save_func_record_to_mod( + cx, + covfun_section_name, + func_name_hash, + func_record_val, + is_used, + ); +} + +/// Each CGU will normally only emit coverage metadata for the functions that it actually generates. +/// But since we don't want unused functions to disappear from coverage reports, we also scan for +/// functions that were instrumented but are not participating in codegen. +/// +/// These unused functions don't need to be codegenned, but we do need to add them to the function +/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them. +/// We also end up adding their symbol names to a special global array that LLVM will include in +/// its embedded coverage data. +fn add_unused_functions(cx: &CodegenCx<'_, '_>) { + assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); + + let tcx = cx.tcx; + let usage = prepare_usage_sets(tcx); + + let is_unused_fn = |def_id: LocalDefId| -> bool { + let def_id = def_id.to_def_id(); + + // To be eligible for "unused function" mappings, a definition must: + // - Be function-like + // - Not participate directly in codegen (or have lost all its coverage statements) + // - Not have any coverage statements inlined into codegenned functions + tcx.def_kind(def_id).is_fn_like() + && (!usage.all_mono_items.contains(&def_id) + || usage.missing_own_coverage.contains(&def_id)) + && !usage.used_via_inlining.contains(&def_id) + }; + + // Scan for unused functions that were instrumented for coverage. + for def_id in tcx.mir_keys(()).iter().copied().filter(|&def_id| is_unused_fn(def_id)) { + // Get the coverage info from MIR, skipping functions that were never instrumented. + let body = tcx.optimized_mir(def_id); + let Some(function_coverage_info) = body.function_coverage_info.as_deref() else { continue }; + + // FIXME(79651): Consider trying to filter out dummy instantiations of + // unused generic functions from library crates, because they can produce + // "unused instantiation" in coverage reports even when they are actually + // used by some downstream crate in the same binary. + + debug!("generating unused fn: {def_id:?}"); + add_unused_function_coverage(cx, def_id, function_coverage_info); + } +} + +struct UsageSets<'tcx> { + all_mono_items: &'tcx DefIdSet, + used_via_inlining: FxHashSet<DefId>, + missing_own_coverage: FxHashSet<DefId>, +} + +/// Prepare sets of definitions that are relevant to deciding whether something +/// is an "unused function" for coverage purposes. +fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> { + let (all_mono_items, cgus) = tcx.collect_and_partition_mono_items(()); + + // Obtain a MIR body for each function participating in codegen, via an + // arbitrary instance. + let mut def_ids_seen = FxHashSet::default(); + let def_and_mir_for_all_mono_fns = cgus + .iter() + .flat_map(|cgu| cgu.items().keys()) + .filter_map(|item| match item { + mir::mono::MonoItem::Fn(instance) => Some(instance), + mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None, + }) + // We only need one arbitrary instance per definition. + .filter(move |instance| def_ids_seen.insert(instance.def_id())) + .map(|instance| { + // We don't care about the instance, just its underlying MIR. + let body = tcx.instance_mir(instance.def); + (instance.def_id(), body) + }); + + // Functions whose coverage statments were found inlined into other functions. + let mut used_via_inlining = FxHashSet::default(); + // Functions that were instrumented, but had all of their coverage statements + // removed by later MIR transforms (e.g. UnreachablePropagation). + let mut missing_own_coverage = FxHashSet::default(); + + for (def_id, body) in def_and_mir_for_all_mono_fns { + let mut saw_own_coverage = false; + + // Inspect every coverage statement in the function's MIR. + for stmt in body + .basic_blocks + .iter() + .flat_map(|block| &block.statements) + .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_))) + { + if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) { + // This coverage statement was inlined from another function. + used_via_inlining.insert(inlined.def_id()); + } else { + // Non-inlined coverage statements belong to the enclosing function. + saw_own_coverage = true; + } + } + + if !saw_own_coverage && body.function_coverage_info.is_some() { + missing_own_coverage.insert(def_id); + } + } + + UsageSets { all_mono_items, used_via_inlining, missing_own_coverage } +} + +fn add_unused_function_coverage<'tcx>( + cx: &CodegenCx<'_, 'tcx>, + def_id: LocalDefId, + function_coverage_info: &'tcx mir::coverage::FunctionCoverageInfo, +) { + let tcx = cx.tcx; + let def_id = def_id.to_def_id(); + + // Make a dummy instance that fills in all generics with placeholders. + let instance = ty::Instance::new( + def_id, + ty::GenericArgs::for_item(tcx, def_id, |param, _| { + if let ty::GenericParamDefKind::Lifetime = param.kind { + tcx.lifetimes.re_erased.into() + } else { + tcx.mk_param_from_def(param) + } + }), + ); + + // An unused function's mappings will automatically be rewritten to map to + // zero, because none of its counters/expressions are marked as seen. + let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info); + + if let Some(coverage_context) = cx.coverage_context() { + coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage); + } else { + bug!("Could not get the `coverage_context`"); + } +} diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs new file mode 100644 index 00000000000..26ea95f0f0d --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -0,0 +1,365 @@ +use crate::llvm; + +use crate::builder::Builder; +use crate::common::CodegenCx; +use crate::coverageinfo::ffi::{CounterExpression, CounterMappingRegion}; +use crate::coverageinfo::map_data::FunctionCoverageCollector; + +use libc::c_uint; +use rustc_codegen_ssa::traits::{ + BaseTypeMethods, BuilderMethods, ConstMethods, CoverageInfoBuilderMethods, MiscMethods, + StaticMethods, +}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; +use rustc_llvm::RustString; +use rustc_middle::bug; +use rustc_middle::mir::coverage::CoverageKind; +use rustc_middle::ty::layout::HasTyCtxt; +use rustc_middle::ty::Instance; +use rustc_target::abi::{Align, Size}; + +use std::cell::RefCell; + +pub(crate) mod ffi; +pub(crate) mod map_data; +pub mod mapgen; + +/// A context object for maintaining all state needed by the coverageinfo module. +pub struct CrateCoverageContext<'ll, 'tcx> { + /// Coverage data for each instrumented function identified by DefId. + pub(crate) function_coverage_map: + RefCell<FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>>>, + pub(crate) pgo_func_name_var_map: RefCell<FxHashMap<Instance<'tcx>, &'ll llvm::Value>>, + pub(crate) mcdc_condition_bitmap_map: RefCell<FxHashMap<Instance<'tcx>, Vec<&'ll llvm::Value>>>, +} + +impl<'ll, 'tcx> CrateCoverageContext<'ll, 'tcx> { + pub fn new() -> Self { + Self { + function_coverage_map: Default::default(), + pgo_func_name_var_map: Default::default(), + mcdc_condition_bitmap_map: Default::default(), + } + } + + pub fn take_function_coverage_map( + &self, + ) -> FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>> { + self.function_coverage_map.replace(FxIndexMap::default()) + } + + /// LLVM use a temp value to record evaluated mcdc test vector of each decision, which is called condition bitmap. + /// In order to handle nested decisions, several condition bitmaps can be + /// allocated for a function body. + /// These values are named `mcdc.addr.{i}` and are a 32-bit integers. + /// They respectively hold the condition bitmaps for decisions with a depth of `i`. + fn try_get_mcdc_condition_bitmap( + &self, + instance: &Instance<'tcx>, + decision_depth: u16, + ) -> Option<&'ll llvm::Value> { + self.mcdc_condition_bitmap_map + .borrow() + .get(instance) + .and_then(|bitmap_map| bitmap_map.get(decision_depth as usize)) + .copied() // Dereference Option<&&Value> to Option<&Value> + } +} + +// These methods used to be part of trait `CoverageInfoMethods`, which no longer +// exists after most coverage code was moved out of SSA. +impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { + pub(crate) fn coverageinfo_finalize(&self) { + mapgen::finalize(self) + } + + /// For LLVM codegen, returns a function-specific `Value` for a global + /// string, to hold the function name passed to LLVM intrinsic + /// `instrprof.increment()`. The `Value` is only created once per instance. + /// Multiple invocations with the same instance return the same `Value`. + fn get_pgo_func_name_var(&self, instance: Instance<'tcx>) -> &'ll llvm::Value { + if let Some(coverage_context) = self.coverage_context() { + debug!("getting pgo_func_name_var for instance={:?}", instance); + let mut pgo_func_name_var_map = coverage_context.pgo_func_name_var_map.borrow_mut(); + pgo_func_name_var_map + .entry(instance) + .or_insert_with(|| create_pgo_func_name_var(self, instance)) + } else { + bug!("Could not get the `coverage_context`"); + } + } +} + +impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { + fn init_coverage(&mut self, instance: Instance<'tcx>) { + let Some(function_coverage_info) = + self.tcx.instance_mir(instance.def).function_coverage_info.as_deref() + else { + return; + }; + + // If there are no MC/DC bitmaps to set up, return immediately. + if function_coverage_info.mcdc_bitmap_bytes == 0 { + return; + } + + let fn_name = self.get_pgo_func_name_var(instance); + let hash = self.const_u64(function_coverage_info.function_source_hash); + let bitmap_bytes = self.const_u32(function_coverage_info.mcdc_bitmap_bytes); + self.mcdc_parameters(fn_name, hash, bitmap_bytes); + + // Create pointers named `mcdc.addr.{i}` to stack-allocated condition bitmaps. + let mut cond_bitmaps = vec![]; + for i in 0..function_coverage_info.mcdc_num_condition_bitmaps { + // MC/DC intrinsics will perform loads/stores that use the ABI default + // alignment for i32, so our variable declaration should match. + let align = self.tcx.data_layout.i32_align.abi; + let cond_bitmap = self.alloca(Size::from_bytes(4), align); + llvm::set_value_name(cond_bitmap, format!("mcdc.addr.{i}").as_bytes()); + self.store(self.const_i32(0), cond_bitmap, align); + cond_bitmaps.push(cond_bitmap); + } + + self.coverage_context() + .expect("always present when coverage is enabled") + .mcdc_condition_bitmap_map + .borrow_mut() + .insert(instance, cond_bitmaps); + } + + #[instrument(level = "debug", skip(self))] + fn add_coverage(&mut self, instance: Instance<'tcx>, kind: &CoverageKind) { + // Our caller should have already taken care of inlining subtleties, + // so we can assume that counter/expression IDs in this coverage + // statement are meaningful for the given instance. + // + // (Either the statement was not inlined and directly belongs to this + // instance, or it was inlined *from* this instance.) + + let bx = self; + + let Some(function_coverage_info) = + bx.tcx.instance_mir(instance.def).function_coverage_info.as_deref() + else { + debug!("function has a coverage statement but no coverage info"); + return; + }; + + let Some(coverage_context) = bx.coverage_context() else { return }; + let mut coverage_map = coverage_context.function_coverage_map.borrow_mut(); + let func_coverage = coverage_map + .entry(instance) + .or_insert_with(|| FunctionCoverageCollector::new(instance, function_coverage_info)); + + match *kind { + CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!( + "marker statement {kind:?} should have been removed by CleanupPostBorrowck" + ), + CoverageKind::CounterIncrement { id } => { + func_coverage.mark_counter_id_seen(id); + // We need to explicitly drop the `RefMut` before calling into `instrprof_increment`, + // as that needs an exclusive borrow. + drop(coverage_map); + + // The number of counters passed to `llvm.instrprof.increment` might + // be smaller than the number originally inserted by the instrumentor, + // if some high-numbered counters were removed by MIR optimizations. + // If so, LLVM's profiler runtime will use fewer physical counters. + let num_counters = + bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1; + assert!( + num_counters as usize <= function_coverage_info.num_counters, + "num_counters disagreement: query says {num_counters} but function info only has {}", + function_coverage_info.num_counters + ); + + let fn_name = bx.get_pgo_func_name_var(instance); + let hash = bx.const_u64(function_coverage_info.function_source_hash); + let num_counters = bx.const_u32(num_counters); + let index = bx.const_u32(id.as_u32()); + debug!( + "codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})", + fn_name, hash, num_counters, index, + ); + bx.instrprof_increment(fn_name, hash, num_counters, index); + } + CoverageKind::ExpressionUsed { id } => { + func_coverage.mark_expression_id_seen(id); + } + CoverageKind::CondBitmapUpdate { id, value, decision_depth } => { + drop(coverage_map); + assert_ne!( + id.as_u32(), + 0, + "ConditionId of evaluated conditions should never be zero" + ); + let cond_bitmap = coverage_context + .try_get_mcdc_condition_bitmap(&instance, decision_depth) + .expect("mcdc cond bitmap should have been allocated for updating"); + let cond_loc = bx.const_i32(id.as_u32() as i32 - 1); + let bool_value = bx.const_bool(value); + let fn_name = bx.get_pgo_func_name_var(instance); + let hash = bx.const_u64(function_coverage_info.function_source_hash); + bx.mcdc_condbitmap_update(fn_name, hash, cond_loc, cond_bitmap, bool_value); + } + CoverageKind::TestVectorBitmapUpdate { bitmap_idx, decision_depth } => { + drop(coverage_map); + let cond_bitmap = coverage_context + .try_get_mcdc_condition_bitmap(&instance, decision_depth) + .expect("mcdc cond bitmap should have been allocated for merging into the global bitmap"); + let bitmap_bytes = function_coverage_info.mcdc_bitmap_bytes; + assert!(bitmap_idx < bitmap_bytes, "bitmap index of the decision out of range"); + + let fn_name = bx.get_pgo_func_name_var(instance); + let hash = bx.const_u64(function_coverage_info.function_source_hash); + let bitmap_bytes = bx.const_u32(bitmap_bytes); + let bitmap_index = bx.const_u32(bitmap_idx); + bx.mcdc_tvbitmap_update(fn_name, hash, bitmap_bytes, bitmap_index, cond_bitmap); + } + } + } +} + +/// Calls llvm::createPGOFuncNameVar() with the given function instance's +/// mangled function name. The LLVM API returns an llvm::GlobalVariable +/// containing the function name, with the specific variable name and linkage +/// required by LLVM InstrProf source-based coverage instrumentation. Use +/// `bx.get_pgo_func_name_var()` to ensure the variable is only created once per +/// `Instance`. +fn create_pgo_func_name_var<'ll, 'tcx>( + cx: &CodegenCx<'ll, 'tcx>, + instance: Instance<'tcx>, +) -> &'ll llvm::Value { + let mangled_fn_name: &str = cx.tcx.symbol_name(instance).name; + let llfn = cx.get_fn(instance); + unsafe { + llvm::LLVMRustCoverageCreatePGOFuncNameVar( + llfn, + mangled_fn_name.as_ptr().cast(), + mangled_fn_name.len(), + ) + } +} + +pub(crate) fn write_filenames_section_to_buffer<'a>( + filenames: impl IntoIterator<Item = &'a str>, + buffer: &RustString, +) { + let (pointers, lengths) = filenames + .into_iter() + .map(|s: &str| (s.as_ptr().cast(), s.len())) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + unsafe { + llvm::LLVMRustCoverageWriteFilenamesSectionToBuffer( + pointers.as_ptr(), + pointers.len(), + lengths.as_ptr(), + lengths.len(), + buffer, + ); + } +} + +pub(crate) fn write_mapping_to_buffer( + virtual_file_mapping: Vec<u32>, + expressions: Vec<CounterExpression>, + mapping_regions: Vec<CounterMappingRegion>, + buffer: &RustString, +) { + unsafe { + llvm::LLVMRustCoverageWriteMappingToBuffer( + virtual_file_mapping.as_ptr(), + virtual_file_mapping.len() as c_uint, + expressions.as_ptr(), + expressions.len() as c_uint, + mapping_regions.as_ptr(), + mapping_regions.len() as c_uint, + buffer, + ); + } +} + +pub(crate) fn hash_bytes(bytes: &[u8]) -> u64 { + unsafe { llvm::LLVMRustCoverageHashByteArray(bytes.as_ptr().cast(), bytes.len()) } +} + +pub(crate) fn mapping_version() -> u32 { + unsafe { llvm::LLVMRustCoverageMappingVersion() } +} + +pub(crate) fn save_cov_data_to_mod<'ll, 'tcx>( + cx: &CodegenCx<'ll, 'tcx>, + cov_data_val: &'ll llvm::Value, +) { + let covmap_var_name = llvm::build_string(|s| unsafe { + llvm::LLVMRustCoverageWriteMappingVarNameToString(s); + }) + .expect("Rust Coverage Mapping var name failed UTF-8 conversion"); + debug!("covmap var name: {:?}", covmap_var_name); + + let covmap_section_name = llvm::build_string(|s| unsafe { + llvm::LLVMRustCoverageWriteMapSectionNameToString(cx.llmod, s); + }) + .expect("Rust Coverage section name failed UTF-8 conversion"); + debug!("covmap section name: {:?}", covmap_section_name); + + let llglobal = llvm::add_global(cx.llmod, cx.val_ty(cov_data_val), &covmap_var_name); + llvm::set_initializer(llglobal, cov_data_val); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); + llvm::set_section(llglobal, &covmap_section_name); + // LLVM's coverage mapping format specifies 8-byte alignment for items in this section. + llvm::set_alignment(llglobal, Align::EIGHT); + cx.add_used_global(llglobal); +} + +pub(crate) fn save_func_record_to_mod<'ll, 'tcx>( + cx: &CodegenCx<'ll, 'tcx>, + covfun_section_name: &str, + func_name_hash: u64, + func_record_val: &'ll llvm::Value, + is_used: bool, +) { + // Assign a name to the function record. This is used to merge duplicates. + // + // In LLVM, a "translation unit" (effectively, a `Crate` in Rust) can describe functions that + // are included-but-not-used. If (or when) Rust generates functions that are + // included-but-not-used, note that a dummy description for a function included-but-not-used + // in a Crate can be replaced by full description provided by a different Crate. The two kinds + // of descriptions play distinct roles in LLVM IR; therefore, assign them different names (by + // appending "u" to the end of the function record var name, to prevent `linkonce_odr` merging. + let func_record_var_name = + format!("__covrec_{:X}{}", func_name_hash, if is_used { "u" } else { "" }); + debug!("function record var name: {:?}", func_record_var_name); + debug!("function record section name: {:?}", covfun_section_name); + + let llglobal = llvm::add_global(cx.llmod, cx.val_ty(func_record_val), &func_record_var_name); + llvm::set_initializer(llglobal, func_record_val); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, llvm::Linkage::LinkOnceODRLinkage); + llvm::set_visibility(llglobal, llvm::Visibility::Hidden); + llvm::set_section(llglobal, covfun_section_name); + // LLVM's coverage mapping format specifies 8-byte alignment for items in this section. + llvm::set_alignment(llglobal, Align::EIGHT); + llvm::set_comdat(cx.llmod, llglobal, &func_record_var_name); + cx.add_used_global(llglobal); +} + +/// Returns the section name string to pass through to the linker when embedding +/// per-function coverage information in the object file, according to the target +/// platform's object file format. +/// +/// LLVM's coverage tools read coverage mapping details from this section when +/// producing coverage reports. +/// +/// Typical values are: +/// - `__llvm_covfun` on Linux +/// - `__LLVM_COV,__llvm_covfun` on macOS (includes `__LLVM_COV,` segment prefix) +/// - `.lcovfun$M` on Windows (includes `$M` sorting suffix) +pub(crate) fn covfun_section_name(cx: &CodegenCx<'_, '_>) -> String { + llvm::build_string(|s| unsafe { + llvm::LLVMRustCoverageWriteFuncSectionNameToString(cx.llmod, s); + }) + .expect("Rust Coverage function record section name failed UTF-8 conversion") +} |
