diff options
| author | bors <bors@rust-lang.org> | 2023-10-18 18:48:34 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-10-18 18:48:34 +0000 |
| commit | cc705b801236d064260bb67b3a0a25e4747fa7ec (patch) | |
| tree | 8610b8d32e7bdaefa82db491d8e5d7b55b841e1e /compiler/rustc_middle/src | |
| parent | e1aa9edde0f363fbc6ab4a50c85974306e56667e (diff) | |
| parent | 33da0978ac2674d046d3b01a9db33dc7f19339c6 (diff) | |
| download | rust-cc705b801236d064260bb67b3a0a25e4747fa7ec.tar.gz rust-cc705b801236d064260bb67b3a0a25e4747fa7ec.zip | |
Auto merge of #116046 - Zalathar:fn-cov-info, r=cjgillot
coverage: Move most per-function coverage info into `mir::Body` Currently, all of the coverage information collected by the `InstrumentCoverage` pass is smuggled through MIR in the form of individual `StatementKind::Coverage` statements, which must then be reassembled by coverage codegen. That's awkward for a number of reasons: - While some of the coverage statements do care about their specific position in the MIR control-flow graph, many of them don't, and are just tacked onto the function's first BB as metadata carriers. - MIR inlining can result in coverage statements being duplicated, so coverage codegen has to jump through hoops to avoid emitting duplicate mappings. - MIR optimizations that would delete coverage statements need to carefully copy them into the function's first BB so as not to omit them from coverage reports. - The order in which coverage codegen sees coverage statements is dependent on MIR optimizations/inlining, which can cause unnecessary churn in the emitted coverage mappings. - We don't have a good way to annotate MIR-level functions with extra coverage info that doesn't belong in a statement. --- This PR therefore takes most of the per-function coverage info and stores it in a field in `mir::Body` as `Option<Box<FunctionCoverageInfo>>`. (This adds one pointer to the size of `mir::Body`, even when coverage is not enabled.) Coverage statements still need to be injected into MIR in some cases, but only when they actually affect codegen (counters) or are needed to detect code that has been optimized away as unreachable (counters/expressions). --- By the end of this PR, the information stored in `FunctionCoverageInfo` is: - A hash of the function's source code (needed by LLVM's coverage map format) - The number of coverage counters added by coverage instrumentation - A table of coverage expressions, associating each expression ID with its operator (add or subtract) and its two operands - The list of mappings, associating each covered code region with a counter/expression/zero value --- ~~This is built on top of #115301, so I'll rebase and roll a reviewer once that lands.~~ r? `@ghost` `@rustbot` label +A-code-coverage
Diffstat (limited to 'compiler/rustc_middle/src')
| -rw-r--r-- | compiler/rustc_middle/src/mir/coverage.rs | 105 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/mod.rs | 10 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/pretty.rs | 29 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/query.rs | 24 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/mir/syntax.rs | 18 | ||||
| -rw-r--r-- | compiler/rustc_middle/src/query/mod.rs | 20 |
6 files changed, 135 insertions, 71 deletions
diff --git a/compiler/rustc_middle/src/mir/coverage.rs b/compiler/rustc_middle/src/mir/coverage.rs index a6d6f6f5df4..08d377a8695 100644 --- a/compiler/rustc_middle/src/mir/coverage.rs +++ b/compiler/rustc_middle/src/mir/coverage.rs @@ -1,5 +1,6 @@ //! Metadata from source code coverage analysis and instrumentation. +use rustc_index::IndexVec; use rustc_macros::HashStable; use rustc_span::Symbol; @@ -8,6 +9,11 @@ use std::fmt::{self, Debug, Formatter}; rustc_index::newtype_index! { /// ID of a coverage counter. Values ascend from 0. /// + /// Before MIR inlining, counter IDs are local to their enclosing function. + /// After MIR inlining, coverage statements may have been inlined into + /// another function, so use the statement's source-scope to find which + /// function/instance its IDs are meaningful for. + /// /// Note that LLVM handles counter IDs as `uint32_t`, so there is no need /// to use a larger representation on the Rust side. #[derive(HashStable)] @@ -23,6 +29,11 @@ impl CounterId { rustc_index::newtype_index! { /// ID of a coverage-counter expression. Values ascend from 0. /// + /// Before MIR inlining, expression IDs are local to their enclosing function. + /// After MIR inlining, coverage statements may have been inlined into + /// another function, so use the statement's source-scope to find which + /// function/instance its IDs are meaningful for. + /// /// Note that LLVM handles expression IDs as `uint32_t`, so there is no need /// to use a larger representation on the Rust side. #[derive(HashStable)] @@ -35,19 +46,21 @@ impl ExpressionId { pub const START: Self = Self::from_u32(0); } -/// Operand of a coverage-counter expression. +/// Enum that can hold a constant zero value, the ID of an physical coverage +/// counter, or the ID of a coverage-counter expression. /// -/// Operands can be a constant zero value, an actual coverage counter, or another -/// expression. Counter/expression operands are referred to by ID. +/// This was originally only used for expression operands (and named `Operand`), +/// but the zero/counter/expression distinction is also useful for representing +/// the value of code/gap mappings, and the true/false arms of branch mappings. #[derive(Copy, Clone, PartialEq, Eq)] #[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)] -pub enum Operand { +pub enum CovTerm { Zero, Counter(CounterId), Expression(ExpressionId), } -impl Debug for Operand { +impl Debug for CovTerm { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Self::Zero => write!(f, "Zero"), @@ -59,40 +72,31 @@ impl Debug for Operand { #[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)] pub enum CoverageKind { - Counter { - function_source_hash: u64, - /// ID of this counter within its enclosing function. - /// Expressions in the same function can refer to it as an operand. - id: CounterId, - }, - Expression { - /// ID of this coverage-counter expression within its enclosing function. - /// Other expressions in the same function can refer to it as an operand. - id: ExpressionId, - lhs: Operand, - op: Op, - rhs: Operand, - }, - Unreachable, + /// Marks the point in MIR control flow represented by a coverage counter. + /// + /// This is eventually lowered to `llvm.instrprof.increment` in LLVM IR. + /// + /// If this statement does not survive MIR optimizations, any mappings that + /// refer to this counter can have those references simplified to zero. + CounterIncrement { id: CounterId }, + + /// Marks the point in MIR control-flow represented by a coverage expression. + /// + /// If this statement does not survive MIR optimizations, any mappings that + /// refer to this expression can have those references simplified to zero. + /// + /// (This is only inserted for expression IDs that are directly used by + /// mappings. Intermediate expressions with no direct mappings are + /// retained/zeroed based on whether they are transitively used.) + ExpressionUsed { id: ExpressionId }, } impl Debug for CoverageKind { fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result { use CoverageKind::*; match self { - Counter { id, .. } => write!(fmt, "Counter({:?})", id.index()), - Expression { id, lhs, op, rhs } => write!( - fmt, - "Expression({:?}) = {:?} {} {:?}", - id.index(), - lhs, - match op { - Op::Add => "+", - Op::Subtract => "-", - }, - rhs, - ), - Unreachable => write!(fmt, "Unreachable"), + CounterIncrement { id } => write!(fmt, "CounterIncrement({:?})", id.index()), + ExpressionUsed { id } => write!(fmt, "ExpressionUsed({:?})", id.index()), } } } @@ -133,3 +137,38 @@ impl Op { matches!(self, Self::Subtract) } } + +#[derive(Clone, Debug)] +#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)] +pub struct Expression { + pub lhs: CovTerm, + pub op: Op, + pub rhs: CovTerm, +} + +#[derive(Clone, Debug)] +#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)] +pub struct Mapping { + pub code_region: CodeRegion, + + /// Indicates whether this mapping uses a counter value, expression value, + /// or zero value. + /// + /// FIXME: When we add support for mapping kinds other than `Code` + /// (e.g. branch regions, expansion regions), replace this with a dedicated + /// mapping-kind enum. + pub term: CovTerm, +} + +/// Stores per-function coverage information attached to a `mir::Body`, +/// to be used in conjunction with the individual coverage statements injected +/// into the function's basic blocks. +#[derive(Clone, Debug)] +#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)] +pub struct FunctionCoverageInfo { + pub function_source_hash: u64, + pub num_counters: usize, + + pub expressions: IndexVec<ExpressionId, Expression>, + pub mappings: Vec<Mapping>, +} diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index f979f736b15..3a5ff4dc91f 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -345,6 +345,14 @@ pub struct Body<'tcx> { pub injection_phase: Option<MirPhase>, pub tainted_by_errors: Option<ErrorGuaranteed>, + + /// Per-function coverage information added by the `InstrumentCoverage` + /// pass, to be used in conjunction with the coverage statements injected + /// into this body's blocks. + /// + /// If `-Cinstrument-coverage` is not active, or if an individual function + /// is not eligible for coverage, then this should always be `None`. + pub function_coverage_info: Option<Box<coverage::FunctionCoverageInfo>>, } impl<'tcx> Body<'tcx> { @@ -392,6 +400,7 @@ impl<'tcx> Body<'tcx> { is_polymorphic: false, injection_phase: None, tainted_by_errors, + function_coverage_info: None, }; body.is_polymorphic = body.has_non_region_param(); body @@ -420,6 +429,7 @@ impl<'tcx> Body<'tcx> { is_polymorphic: false, injection_phase: None, tainted_by_errors: None, + function_coverage_info: None, }; body.is_polymorphic = body.has_non_region_param(); body diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs index c5e3ee575e1..3b3b61e4e21 100644 --- a/compiler/rustc_middle/src/mir/pretty.rs +++ b/compiler/rustc_middle/src/mir/pretty.rs @@ -493,6 +493,27 @@ pub fn write_mir_intro<'tcx>( // Add an empty line before the first block is printed. writeln!(w)?; + if let Some(function_coverage_info) = &body.function_coverage_info { + write_function_coverage_info(function_coverage_info, w)?; + } + + Ok(()) +} + +fn write_function_coverage_info( + function_coverage_info: &coverage::FunctionCoverageInfo, + w: &mut dyn io::Write, +) -> io::Result<()> { + let coverage::FunctionCoverageInfo { expressions, mappings, .. } = function_coverage_info; + + for (id, expression) in expressions.iter_enumerated() { + writeln!(w, "{INDENT}coverage {id:?} => {expression:?};")?; + } + for coverage::Mapping { term, code_region } in mappings { + writeln!(w, "{INDENT}coverage {term:?} => {code_region:?};")?; + } + writeln!(w)?; + Ok(()) } @@ -685,13 +706,7 @@ impl Debug for Statement<'_> { AscribeUserType(box (ref place, ref c_ty), ref variance) => { write!(fmt, "AscribeUserType({place:?}, {variance:?}, {c_ty:?})") } - Coverage(box mir::Coverage { ref kind, ref code_regions }) => { - if code_regions.is_empty() { - write!(fmt, "Coverage::{kind:?}") - } else { - write!(fmt, "Coverage::{kind:?} for {code_regions:?}") - } - } + Coverage(box mir::Coverage { ref kind }) => write!(fmt, "Coverage::{kind:?}"), Intrinsic(box ref intrinsic) => write!(fmt, "{intrinsic}"), ConstEvalCounter => write!(fmt, "ConstEvalCounter"), Nop => write!(fmt, "nop"), diff --git a/compiler/rustc_middle/src/mir/query.rs b/compiler/rustc_middle/src/mir/query.rs index c74a9536b63..f407dc4d7ae 100644 --- a/compiler/rustc_middle/src/mir/query.rs +++ b/compiler/rustc_middle/src/mir/query.rs @@ -1,5 +1,6 @@ //! Values computed by queries that use MIR. +use crate::mir; use crate::ty::{self, OpaqueHiddenType, Ty, TyCtxt}; use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::unord::UnordSet; @@ -445,14 +446,19 @@ pub struct DestructuredConstant<'tcx> { pub fields: &'tcx [(ConstValue<'tcx>, Ty<'tcx>)], } -/// Coverage information summarized from a MIR if instrumented for source code coverage (see -/// compiler option `-Cinstrument-coverage`). This information is generated by the -/// `InstrumentCoverage` MIR pass and can be retrieved via the `coverageinfo` query. +/// Summarizes coverage IDs inserted by the `InstrumentCoverage` MIR pass +/// (for compiler option `-Cinstrument-coverage`), after MIR optimizations +/// have had a chance to potentially remove some of them. +/// +/// Used by the `coverage_ids_info` query. #[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable)] -pub struct CoverageInfo { - /// The total number of coverage region counters added to the MIR `Body`. - pub num_counters: u32, - - /// The total number of coverage region counter expressions added to the MIR `Body`. - pub num_expressions: u32, +pub struct CoverageIdsInfo { + /// Coverage codegen needs to know the highest counter ID that is ever + /// incremented within a function, so that it can set the `num-counters` + /// argument of the `llvm.instrprof.increment` intrinsic. + /// + /// This may be less than the highest counter ID emitted by the + /// InstrumentCoverage MIR pass, if the highest-numbered counter increments + /// were removed by MIR optimizations. + pub max_counter_id: mir::coverage::CounterId, } diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs index c7d99648f1e..7a645fb5d62 100644 --- a/compiler/rustc_middle/src/mir/syntax.rs +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -5,7 +5,7 @@ use super::{BasicBlock, Const, Local, UserTypeProjection}; -use crate::mir::coverage::{CodeRegion, CoverageKind}; +use crate::mir::coverage::CoverageKind; use crate::traits::Reveal; use crate::ty::adjustment::PointerCoercion; use crate::ty::GenericArgsRef; @@ -361,11 +361,16 @@ pub enum StatementKind<'tcx> { /// Disallowed after drop elaboration. AscribeUserType(Box<(Place<'tcx>, UserTypeProjection)>, ty::Variance), - /// Marks the start of a "coverage region", injected with '-Cinstrument-coverage'. A - /// `Coverage` statement carries metadata about the coverage region, used to inject a coverage - /// map into the binary. If `Coverage::kind` is a `Counter`, the statement also generates - /// executable code, to increment a counter variable at runtime, each time the code region is - /// executed. + /// Carries control-flow-sensitive information injected by `-Cinstrument-coverage`, + /// such as where to generate physical coverage-counter-increments during codegen. + /// + /// Coverage statements are used in conjunction with the coverage mappings and other + /// information stored in the function's + /// [`mir::Body::function_coverage_info`](crate::mir::Body::function_coverage_info). + /// (For inlined MIR, take care to look up the *original function's* coverage info.) + /// + /// Interpreters and codegen backends that don't support coverage instrumentation + /// can usually treat this as a no-op. Coverage(Box<Coverage>), /// Denotes a call to an intrinsic that does not require an unwind path and always returns. @@ -514,7 +519,6 @@ pub enum FakeReadCause { #[derive(TypeFoldable, TypeVisitable)] pub struct Coverage { pub kind: CoverageKind, - pub code_regions: Vec<CodeRegion>, } #[derive(Clone, Debug, PartialEq, TyEncodable, TyDecodable, Hash, HashStable)] diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs index 940ab190ffc..afe94d10752 100644 --- a/compiler/rustc_middle/src/query/mod.rs +++ b/compiler/rustc_middle/src/query/mod.rs @@ -573,24 +573,14 @@ rustc_queries! { separate_provide_extern } - /// Returns coverage summary info for a function, after executing the `InstrumentCoverage` - /// MIR pass (assuming the -Cinstrument-coverage option is enabled). - query coverageinfo(key: ty::InstanceDef<'tcx>) -> &'tcx mir::CoverageInfo { - desc { |tcx| "retrieving coverage info from MIR for `{}`", tcx.def_path_str(key.def_id()) } + /// Summarizes coverage IDs inserted by the `InstrumentCoverage` MIR pass + /// (for compiler option `-Cinstrument-coverage`), after MIR optimizations + /// have had a chance to potentially remove some of them. + query coverage_ids_info(key: ty::InstanceDef<'tcx>) -> &'tcx mir::CoverageIdsInfo { + desc { |tcx| "retrieving coverage IDs info from MIR for `{}`", tcx.def_path_str(key.def_id()) } arena_cache } - /// Returns the `CodeRegions` for a function that has instrumented coverage, in case the - /// function was optimized out before codegen, and before being added to the Coverage Map. - query covered_code_regions(key: DefId) -> &'tcx Vec<&'tcx mir::coverage::CodeRegion> { - desc { - |tcx| "retrieving the covered `CodeRegion`s, if instrumented, for `{}`", - tcx.def_path_str(key) - } - arena_cache - cache_on_disk_if { key.is_local() } - } - /// The `DefId` is the `DefId` of the containing MIR body. Promoteds do not have their own /// `DefId`. This function returns all promoteds in the specified body. The body references /// promoteds by the `DefId` and the `mir::Promoted` index. This is necessary, because |
