diff options
| author | bors <bors@rust-lang.org> | 2023-10-18 18:48:34 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-10-18 18:48:34 +0000 |
| commit | cc705b801236d064260bb67b3a0a25e4747fa7ec (patch) | |
| tree | 8610b8d32e7bdaefa82db491d8e5d7b55b841e1e /compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs | |
| parent | e1aa9edde0f363fbc6ab4a50c85974306e56667e (diff) | |
| parent | 33da0978ac2674d046d3b01a9db33dc7f19339c6 (diff) | |
| download | rust-cc705b801236d064260bb67b3a0a25e4747fa7ec.tar.gz rust-cc705b801236d064260bb67b3a0a25e4747fa7ec.zip | |
Auto merge of #116046 - Zalathar:fn-cov-info, r=cjgillot
coverage: Move most per-function coverage info into `mir::Body` Currently, all of the coverage information collected by the `InstrumentCoverage` pass is smuggled through MIR in the form of individual `StatementKind::Coverage` statements, which must then be reassembled by coverage codegen. That's awkward for a number of reasons: - While some of the coverage statements do care about their specific position in the MIR control-flow graph, many of them don't, and are just tacked onto the function's first BB as metadata carriers. - MIR inlining can result in coverage statements being duplicated, so coverage codegen has to jump through hoops to avoid emitting duplicate mappings. - MIR optimizations that would delete coverage statements need to carefully copy them into the function's first BB so as not to omit them from coverage reports. - The order in which coverage codegen sees coverage statements is dependent on MIR optimizations/inlining, which can cause unnecessary churn in the emitted coverage mappings. - We don't have a good way to annotate MIR-level functions with extra coverage info that doesn't belong in a statement. --- This PR therefore takes most of the per-function coverage info and stores it in a field in `mir::Body` as `Option<Box<FunctionCoverageInfo>>`. (This adds one pointer to the size of `mir::Body`, even when coverage is not enabled.) Coverage statements still need to be injected into MIR in some cases, but only when they actually affect codegen (counters) or are needed to detect code that has been optimized away as unreachable (counters/expressions). --- By the end of this PR, the information stored in `FunctionCoverageInfo` is: - A hash of the function's source code (needed by LLVM's coverage map format) - The number of coverage counters added by coverage instrumentation - A table of coverage expressions, associating each expression ID with its operator (add or subtract) and its two operands - The list of mappings, associating each covered code region with a counter/expression/zero value --- ~~This is built on top of #115301, so I'll rebase and roll a reviewer once that lands.~~ r? `@ghost` `@rustbot` label +A-code-coverage
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs')
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs | 344 |
1 files changed, 153 insertions, 191 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs index 55f43aa5341..84319b4ba2d 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs @@ -1,64 +1,78 @@ use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind}; use rustc_data_structures::fx::FxIndexSet; -use rustc_index::IndexVec; -use rustc_middle::mir::coverage::{CodeRegion, CounterId, ExpressionId, Op, Operand}; +use rustc_index::bit_set::BitSet; +use rustc_middle::mir::coverage::{ + CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op, +}; use rustc_middle::ty::Instance; -use rustc_middle::ty::TyCtxt; -#[derive(Clone, Debug, PartialEq)] -pub struct Expression { - lhs: Operand, - op: Op, - rhs: Operand, - code_regions: Vec<CodeRegion>, -} - -/// Collects all of the coverage regions associated with (a) injected counters, (b) counter -/// expressions (additions or subtraction), and (c) unreachable regions (always counted as zero), -/// for a given Function. This struct also stores the `function_source_hash`, -/// computed during instrumentation, and forwarded with counters. -/// -/// Note, it may be important to understand LLVM's definitions of `unreachable` regions versus "gap -/// regions" (or "gap areas"). A gap region is a code region within a counted region (either counter -/// or expression), but the line or lines in the gap region are not executable (such as lines with -/// only whitespace or comments). According to LLVM Code Coverage Mapping documentation, "A count -/// for a gap area is only used as the line execution count if there are no other regions on a -/// line." +/// Holds all of the coverage mapping data associated with a function instance, +/// collected during traversal of `Coverage` statements in the function's MIR. #[derive(Debug)] pub struct FunctionCoverage<'tcx> { - instance: Instance<'tcx>, - source_hash: u64, + /// Coverage info that was attached to this function by the instrumentor. + function_coverage_info: &'tcx FunctionCoverageInfo, is_used: bool, - counters: IndexVec<CounterId, Option<Vec<CodeRegion>>>, - expressions: IndexVec<ExpressionId, Option<Expression>>, - unreachable_regions: Vec<CodeRegion>, + + /// Tracks which counters have been seen, so that we can identify mappings + /// to counters that were optimized out, and set them to zero. + counters_seen: BitSet<CounterId>, + /// Contains all expression IDs that have been seen in an `ExpressionUsed` + /// coverage statement, plus all expression IDs that aren't directly used + /// by any mappings (and therefore do not have expression-used statements). + /// After MIR traversal is finished, we can conclude that any IDs missing + /// from this set must have had their statements deleted by MIR opts. + expressions_seen: BitSet<ExpressionId>, } impl<'tcx> FunctionCoverage<'tcx> { /// Creates a new set of coverage data for a used (called) function. - pub fn new(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Self { - Self::create(tcx, instance, true) + pub fn new( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + ) -> Self { + Self::create(instance, function_coverage_info, true) } /// Creates a new set of coverage data for an unused (never called) function. - pub fn unused(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Self { - Self::create(tcx, instance, false) + pub fn unused( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + ) -> Self { + Self::create(instance, function_coverage_info, false) } - fn create(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>, is_used: bool) -> Self { - let coverageinfo = tcx.coverageinfo(instance.def); + fn create( + instance: Instance<'tcx>, + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, + ) -> Self { + let num_counters = function_coverage_info.num_counters; + let num_expressions = function_coverage_info.expressions.len(); debug!( - "FunctionCoverage::create(instance={:?}) has coverageinfo={:?}. is_used={}", - instance, coverageinfo, is_used + "FunctionCoverage::create(instance={instance:?}) has \ + num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}" ); + + // Create a filled set of expression IDs, so that expressions not + // directly used by mappings will be treated as "seen". + // (If they end up being unused, LLVM will delete them for us.) + let mut expressions_seen = BitSet::new_filled(num_expressions); + // For each expression ID that is directly used by one or more mappings, + // mark it as not-yet-seen. This indicates that we expect to see a + // corresponding `ExpressionUsed` statement during MIR traversal. + for Mapping { term, .. } in &function_coverage_info.mappings { + if let &CovTerm::Expression(id) = term { + expressions_seen.remove(id); + } + } + Self { - instance, - source_hash: 0, // will be set with the first `add_counter()` + function_coverage_info, is_used, - counters: IndexVec::from_elem_n(None, coverageinfo.num_counters as usize), - expressions: IndexVec::from_elem_n(None, coverageinfo.num_expressions as usize), - unreachable_regions: Vec::new(), + counters_seen: BitSet::new_empty(num_counters), + expressions_seen, } } @@ -67,135 +81,94 @@ impl<'tcx> FunctionCoverage<'tcx> { self.is_used } - /// Sets the function source hash value. If called multiple times for the same function, all - /// calls should have the same hash value. - pub fn set_function_source_hash(&mut self, source_hash: u64) { - if self.source_hash == 0 { - self.source_hash = source_hash; - } else { - debug_assert_eq!(source_hash, self.source_hash); - } - } - - /// Adds code regions to be counted by an injected counter intrinsic. + /// Marks a counter ID as having been seen in a counter-increment statement. #[instrument(level = "debug", skip(self))] - pub(crate) fn add_counter(&mut self, id: CounterId, code_regions: &[CodeRegion]) { - if code_regions.is_empty() { - return; - } - - let slot = &mut self.counters[id]; - match slot { - None => *slot = Some(code_regions.to_owned()), - // If this counter ID slot has already been filled, it should - // contain identical information. - Some(ref previous_regions) => assert_eq!( - previous_regions, code_regions, - "add_counter: code regions for id changed" - ), - } - } - - /// Adds information about a coverage expression, along with zero or more - /// code regions mapped to that expression. - /// - /// Both counters and "counter expressions" (or simply, "expressions") can be operands in other - /// expressions. These are tracked as separate variants of `Operand`, so there is no ambiguity - /// between operands that are counter IDs and operands that are expression IDs. - #[instrument(level = "debug", skip(self))] - pub(crate) fn add_counter_expression( - &mut self, - expression_id: ExpressionId, - lhs: Operand, - op: Op, - rhs: Operand, - code_regions: &[CodeRegion], - ) { - debug_assert!( - expression_id.as_usize() < self.expressions.len(), - "expression_id {} is out of range for expressions.len() = {} - for {:?}", - expression_id.as_usize(), - self.expressions.len(), - self, - ); - - let expression = Expression { lhs, op, rhs, code_regions: code_regions.to_owned() }; - let slot = &mut self.expressions[expression_id]; - match slot { - None => *slot = Some(expression), - // If this expression ID slot has already been filled, it should - // contain identical information. - Some(ref previous_expression) => assert_eq!( - previous_expression, &expression, - "add_counter_expression: expression for id changed" - ), - } + pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) { + self.counters_seen.insert(id); } - /// Adds regions that will be marked as "unreachable", with a constant "zero counter". + /// Marks an expression ID as having been seen in an expression-used statement. #[instrument(level = "debug", skip(self))] - pub(crate) fn add_unreachable_regions(&mut self, code_regions: &[CodeRegion]) { - assert!(!code_regions.is_empty(), "unreachable regions always have code regions"); - self.unreachable_regions.extend_from_slice(code_regions); + pub(crate) fn mark_expression_id_seen(&mut self, id: ExpressionId) { + self.expressions_seen.insert(id); } - /// Perform some simplifications to make the final coverage mappings - /// slightly smaller. + /// Identify expressions that will always have a value of zero, and note + /// their IDs in [`ZeroExpressions`]. Mappings that refer to a zero expression + /// can instead become mappings to a constant zero value. /// /// This method mainly exists to preserve the simplifications that were /// already being performed by the Rust-side expression renumbering, so that /// the resulting coverage mappings don't get worse. - pub(crate) fn simplify_expressions(&mut self) { + fn identify_zero_expressions(&self) -> ZeroExpressions { // The set of expressions that either were optimized out entirely, or // have zero as both of their operands, and will therefore always have // a value of zero. Other expressions that refer to these as operands - // can have those operands replaced with `Operand::Zero`. + // can have those operands replaced with `CovTerm::Zero`. let mut zero_expressions = FxIndexSet::default(); - // For each expression, perform simplifications based on lower-numbered - // expressions, and then update the set of always-zero expressions if - // necessary. + // Simplify a copy of each expression based on lower-numbered expressions, + // and then update the set of always-zero expressions if necessary. // (By construction, expressions can only refer to other expressions - // that have lower IDs, so one simplification pass is sufficient.) - for (id, maybe_expression) in self.expressions.iter_enumerated_mut() { - let Some(expression) = maybe_expression else { - // If an expression is missing, it must have been optimized away, + // that have lower IDs, so one pass is sufficient.) + for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() { + if !self.expressions_seen.contains(id) { + // If an expression was not seen, it must have been optimized away, // so any operand that refers to it can be replaced with zero. zero_expressions.insert(id); continue; + } + + // We don't need to simplify the actual expression data in the + // expressions list; we can just simplify a temporary copy and then + // use that to update the set of always-zero expressions. + let Expression { mut lhs, op, mut rhs } = *expression; + + // If an expression has an operand that is also an expression, the + // operand's ID must be strictly lower. This is what lets us find + // all zero expressions in one pass. + let assert_operand_expression_is_lower = |operand_id: ExpressionId| { + assert!( + operand_id < id, + "Operand {operand_id:?} should be less than {id:?} in {expression:?}", + ) }; // If an operand refers to an expression that is always zero, then - // that operand can be replaced with `Operand::Zero`. - let maybe_set_operand_to_zero = |operand: &mut Operand| match &*operand { - Operand::Expression(id) if zero_expressions.contains(id) => { - *operand = Operand::Zero; + // that operand can be replaced with `CovTerm::Zero`. + let maybe_set_operand_to_zero = |operand: &mut CovTerm| match *operand { + CovTerm::Expression(id) => { + assert_operand_expression_is_lower(id); + if zero_expressions.contains(&id) { + *operand = CovTerm::Zero; + } } _ => (), }; - maybe_set_operand_to_zero(&mut expression.lhs); - maybe_set_operand_to_zero(&mut expression.rhs); + maybe_set_operand_to_zero(&mut lhs); + maybe_set_operand_to_zero(&mut rhs); // Coverage counter values cannot be negative, so if an expression // involves subtraction from zero, assume that its RHS must also be zero. // (Do this after simplifications that could set the LHS to zero.) - if let Expression { lhs: Operand::Zero, op: Op::Subtract, .. } = expression { - expression.rhs = Operand::Zero; + if lhs == CovTerm::Zero && op == Op::Subtract { + rhs = CovTerm::Zero; } // After the above simplifications, if both operands are zero, then // we know that this expression is always zero too. - if let Expression { lhs: Operand::Zero, rhs: Operand::Zero, .. } = expression { + if lhs == CovTerm::Zero && rhs == CovTerm::Zero { zero_expressions.insert(id); } } + + ZeroExpressions(zero_expressions) } /// Return the source hash, generated from the HIR node structure, and used to indicate whether /// or not the source code structure changed between different compilations. pub fn source_hash(&self) -> u64 { - self.source_hash + if self.is_used { self.function_coverage_info.function_source_hash } else { 0 } } /// Generate an array of CounterExpressions, and an iterator over all `Counter`s and their @@ -204,91 +177,80 @@ impl<'tcx> FunctionCoverage<'tcx> { pub fn get_expressions_and_counter_regions( &self, ) -> (Vec<CounterExpression>, impl Iterator<Item = (Counter, &CodeRegion)>) { - assert!( - self.source_hash != 0 || !self.is_used, - "No counters provided the source_hash for used function: {:?}", - self.instance - ); + let zero_expressions = self.identify_zero_expressions(); - let counter_expressions = self.counter_expressions(); + let counter_expressions = self.counter_expressions(&zero_expressions); // Expression IDs are indices into `self.expressions`, and on the LLVM // side they will be treated as indices into `counter_expressions`, so // the two vectors should correspond 1:1. - assert_eq!(self.expressions.len(), counter_expressions.len()); + assert_eq!(self.function_coverage_info.expressions.len(), counter_expressions.len()); - let counter_regions = self.counter_regions(); - let expression_regions = self.expression_regions(); - let unreachable_regions = self.unreachable_regions(); + let counter_regions = self.counter_regions(zero_expressions); - let counter_regions = - counter_regions.chain(expression_regions.into_iter().chain(unreachable_regions)); (counter_expressions, counter_regions) } - fn counter_regions(&self) -> impl Iterator<Item = (Counter, &CodeRegion)> { - self.counters - .iter_enumerated() - // Filter out counter IDs that we never saw during MIR traversal. - // This can happen if a counter was optimized out by MIR transforms - // (and replaced with `CoverageKind::Unreachable` instead). - .filter_map(|(id, maybe_code_regions)| Some((id, maybe_code_regions.as_ref()?))) - .flat_map(|(id, code_regions)| { - let counter = Counter::counter_value_reference(id); - code_regions.iter().map(move |region| (counter, region)) - }) - } - /// Convert this function's coverage expression data into a form that can be /// passed through FFI to LLVM. - fn counter_expressions(&self) -> Vec<CounterExpression> { + fn counter_expressions(&self, zero_expressions: &ZeroExpressions) -> Vec<CounterExpression> { // We know that LLVM will optimize out any unused expressions before // producing the final coverage map, so there's no need to do the same // thing on the Rust side unless we're confident we can do much better. // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.) - self.expressions + let counter_from_operand = |operand: CovTerm| match operand { + CovTerm::Expression(id) if zero_expressions.contains(id) => Counter::ZERO, + _ => Counter::from_term(operand), + }; + + self.function_coverage_info + .expressions .iter() - .map(|expression| match expression { - None => { - // This expression ID was allocated, but we never saw the - // actual expression, so it must have been optimized out. - // Replace it with a dummy expression, and let LLVM take - // care of omitting it from the expression list. - CounterExpression::DUMMY - } - &Some(Expression { lhs, op, rhs, .. }) => { - // Convert the operands and operator as normal. - CounterExpression::new( - Counter::from_operand(lhs), - match op { - Op::Add => ExprKind::Add, - Op::Subtract => ExprKind::Subtract, - }, - Counter::from_operand(rhs), - ) - } + .map(|&Expression { lhs, op, rhs }| CounterExpression { + lhs: counter_from_operand(lhs), + kind: match op { + Op::Add => ExprKind::Add, + Op::Subtract => ExprKind::Subtract, + }, + rhs: counter_from_operand(rhs), }) .collect::<Vec<_>>() } - fn expression_regions(&self) -> Vec<(Counter, &CodeRegion)> { - // Find all of the expression IDs that weren't optimized out AND have - // one or more attached code regions, and return the corresponding - // mappings as counter/region pairs. - self.expressions - .iter_enumerated() - .filter_map(|(id, maybe_expression)| { - let code_regions = &maybe_expression.as_ref()?.code_regions; - Some((id, code_regions)) - }) - .flat_map(|(id, code_regions)| { - let counter = Counter::expression(id); - code_regions.iter().map(move |code_region| (counter, code_region)) - }) - .collect::<Vec<_>>() + /// Converts this function's coverage mappings into an intermediate form + /// that will be used by `mapgen` when preparing for FFI. + fn counter_regions( + &self, + zero_expressions: ZeroExpressions, + ) -> impl Iterator<Item = (Counter, &CodeRegion)> { + // Historically, mappings were stored directly in counter/expression + // statements in MIR, and MIR optimizations would sometimes remove them. + // That's mostly no longer true, so now we detect cases where that would + // have happened, and zero out the corresponding mappings here instead. + let counter_for_term = move |term: CovTerm| { + let force_to_zero = match term { + CovTerm::Counter(id) => !self.counters_seen.contains(id), + CovTerm::Expression(id) => zero_expressions.contains(id), + CovTerm::Zero => false, + }; + if force_to_zero { Counter::ZERO } else { Counter::from_term(term) } + }; + + self.function_coverage_info.mappings.iter().map(move |mapping| { + let &Mapping { term, ref code_region } = mapping; + let counter = counter_for_term(term); + (counter, code_region) + }) } +} + +/// Set of expression IDs that are known to always evaluate to zero. +/// Any mapping or expression operand that refers to these expressions can have +/// that reference replaced with a constant zero value. +struct ZeroExpressions(FxIndexSet<ExpressionId>); - fn unreachable_regions(&self) -> impl Iterator<Item = (Counter, &CodeRegion)> { - self.unreachable_regions.iter().map(|region| (Counter::ZERO, region)) +impl ZeroExpressions { + fn contains(&self, id: ExpressionId) -> bool { + self.0.contains(&id) } } |
