about summary refs log tree commit diff
diff options
context:
space:
mode:
authorZalathar <Zalathar@users.noreply.github.com>2023-09-13 12:15:40 +1000
committerZalathar <Zalathar@users.noreply.github.com>2023-10-18 21:22:40 +1100
commita18c5f3b751dbfa6c192dc48a6c6f28250e9bc97 (patch)
tree0beb479be41134927ab18e56e3054623a4d4fd6e
parentc479bc7f3b8bb0441bef0634b0a60cfdd1ac0914 (diff)
downloadrust-a18c5f3b751dbfa6c192dc48a6c6f28250e9bc97.tar.gz
rust-a18c5f3b751dbfa6c192dc48a6c6f28250e9bc97.zip
coverage: Store the number of counters/expressions in function coverage info
Coverage codegen can now allocate arrays based on the number of
counters/expressions originally used by the instrumentor.

The existing query that inspects coverage statements is still used for
determining the number of counters passed to `llvm.instrprof.increment`. If
some high-numbered counters were removed by MIR optimizations, the instrumented
binary can potentially use less memory and disk space at runtime.
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs19
-rw-r--r--compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs18
-rw-r--r--compiler/rustc_middle/src/mir/coverage.rs2
-rw-r--r--compiler/rustc_middle/src/mir/query.rs24
-rw-r--r--compiler/rustc_middle/src/query/mod.rs9
-rw-r--r--compiler/rustc_mir_transform/src/coverage/counters.rs8
-rw-r--r--compiler/rustc_mir_transform/src/coverage/mod.rs2
-rw-r--r--compiler/rustc_mir_transform/src/coverage/query.rs89
8 files changed, 69 insertions, 102 deletions
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
index f9d76108302..4dbc0e403fd 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
@@ -6,7 +6,6 @@ use rustc_middle::mir::coverage::{
     CodeRegion, CounterId, ExpressionId, FunctionCoverageInfo, Op, Operand,
 };
 use rustc_middle::ty::Instance;
-use rustc_middle::ty::TyCtxt;
 
 #[derive(Clone, Debug, PartialEq)]
 pub struct Expression {
@@ -40,38 +39,36 @@ pub struct FunctionCoverage<'tcx> {
 impl<'tcx> FunctionCoverage<'tcx> {
     /// Creates a new set of coverage data for a used (called) function.
     pub fn new(
-        tcx: TyCtxt<'tcx>,
         instance: Instance<'tcx>,
         function_coverage_info: &'tcx FunctionCoverageInfo,
     ) -> Self {
-        Self::create(tcx, instance, function_coverage_info, true)
+        Self::create(instance, function_coverage_info, true)
     }
 
     /// Creates a new set of coverage data for an unused (never called) function.
     pub fn unused(
-        tcx: TyCtxt<'tcx>,
         instance: Instance<'tcx>,
         function_coverage_info: &'tcx FunctionCoverageInfo,
     ) -> Self {
-        Self::create(tcx, instance, function_coverage_info, false)
+        Self::create(instance, function_coverage_info, false)
     }
 
     fn create(
-        tcx: TyCtxt<'tcx>,
         instance: Instance<'tcx>,
         function_coverage_info: &'tcx FunctionCoverageInfo,
         is_used: bool,
     ) -> Self {
-        let coverageinfo = tcx.coverageinfo(instance.def);
+        let num_counters = function_coverage_info.num_counters;
+        let num_expressions = function_coverage_info.num_expressions;
         debug!(
-            "FunctionCoverage::create(instance={:?}) has coverageinfo={:?}. is_used={}",
-            instance, coverageinfo, is_used
+            "FunctionCoverage::create(instance={instance:?}) has \
+            num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}"
         );
         Self {
             function_coverage_info,
             is_used,
-            counters: IndexVec::from_elem_n(None, coverageinfo.num_counters as usize),
-            expressions: IndexVec::from_elem_n(None, coverageinfo.num_expressions as usize),
+            counters: IndexVec::from_elem_n(None, num_counters),
+            expressions: IndexVec::from_elem_n(None, num_expressions),
             unreachable_regions: Vec::new(),
         }
     }
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
index 0d9d3c7ad41..c975d3432b9 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@@ -114,7 +114,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
         let mut coverage_map = coverage_context.function_coverage_map.borrow_mut();
         let func_coverage = coverage_map
             .entry(instance)
-            .or_insert_with(|| FunctionCoverage::new(bx.tcx(), instance, function_coverage_info));
+            .or_insert_with(|| FunctionCoverage::new(instance, function_coverage_info));
 
         let Coverage { kind, code_regions } = coverage;
         match *kind {
@@ -124,11 +124,21 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
                 // as that needs an exclusive borrow.
                 drop(coverage_map);
 
-                let coverageinfo = bx.tcx().coverageinfo(instance.def);
+                // The number of counters passed to `llvm.instrprof.increment` might
+                // be smaller than the number originally inserted by the instrumentor,
+                // if some high-numbered counters were removed by MIR optimizations.
+                // If so, LLVM's profiler runtime will use fewer physical counters.
+                let num_counters =
+                    bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1;
+                assert!(
+                    num_counters as usize <= function_coverage_info.num_counters,
+                    "num_counters disagreement: query says {num_counters} but function info only has {}",
+                    function_coverage_info.num_counters
+                );
 
                 let fn_name = bx.get_pgo_func_name_var(instance);
                 let hash = bx.const_u64(function_coverage_info.function_source_hash);
-                let num_counters = bx.const_u32(coverageinfo.num_counters);
+                let num_counters = bx.const_u32(num_counters);
                 let index = bx.const_u32(id.as_u32());
                 debug!(
                     "codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})",
@@ -208,7 +218,7 @@ fn add_unused_function_coverage<'tcx>(
 ) {
     let tcx = cx.tcx;
 
-    let mut function_coverage = FunctionCoverage::unused(tcx, instance, function_coverage_info);
+    let mut function_coverage = FunctionCoverage::unused(instance, function_coverage_info);
     for &code_region in tcx.covered_code_regions(def_id) {
         let code_region = std::slice::from_ref(code_region);
         function_coverage.add_unreachable_regions(code_region);
diff --git a/compiler/rustc_middle/src/mir/coverage.rs b/compiler/rustc_middle/src/mir/coverage.rs
index e1434e71a47..837331a2de8 100644
--- a/compiler/rustc_middle/src/mir/coverage.rs
+++ b/compiler/rustc_middle/src/mir/coverage.rs
@@ -140,4 +140,6 @@ impl Op {
 #[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
 pub struct FunctionCoverageInfo {
     pub function_source_hash: u64,
+    pub num_counters: usize,
+    pub num_expressions: usize,
 }
diff --git a/compiler/rustc_middle/src/mir/query.rs b/compiler/rustc_middle/src/mir/query.rs
index c74a9536b63..f407dc4d7ae 100644
--- a/compiler/rustc_middle/src/mir/query.rs
+++ b/compiler/rustc_middle/src/mir/query.rs
@@ -1,5 +1,6 @@
 //! Values computed by queries that use MIR.
 
+use crate::mir;
 use crate::ty::{self, OpaqueHiddenType, Ty, TyCtxt};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_data_structures::unord::UnordSet;
@@ -445,14 +446,19 @@ pub struct DestructuredConstant<'tcx> {
     pub fields: &'tcx [(ConstValue<'tcx>, Ty<'tcx>)],
 }
 
-/// Coverage information summarized from a MIR if instrumented for source code coverage (see
-/// compiler option `-Cinstrument-coverage`). This information is generated by the
-/// `InstrumentCoverage` MIR pass and can be retrieved via the `coverageinfo` query.
+/// Summarizes coverage IDs inserted by the `InstrumentCoverage` MIR pass
+/// (for compiler option `-Cinstrument-coverage`), after MIR optimizations
+/// have had a chance to potentially remove some of them.
+///
+/// Used by the `coverage_ids_info` query.
 #[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable)]
-pub struct CoverageInfo {
-    /// The total number of coverage region counters added to the MIR `Body`.
-    pub num_counters: u32,
-
-    /// The total number of coverage region counter expressions added to the MIR `Body`.
-    pub num_expressions: u32,
+pub struct CoverageIdsInfo {
+    /// Coverage codegen needs to know the highest counter ID that is ever
+    /// incremented within a function, so that it can set the `num-counters`
+    /// argument of the `llvm.instrprof.increment` intrinsic.
+    ///
+    /// This may be less than the highest counter ID emitted by the
+    /// InstrumentCoverage MIR pass, if the highest-numbered counter increments
+    /// were removed by MIR optimizations.
+    pub max_counter_id: mir::coverage::CounterId,
 }
diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs
index 940ab190ffc..ed0f73fcf2f 100644
--- a/compiler/rustc_middle/src/query/mod.rs
+++ b/compiler/rustc_middle/src/query/mod.rs
@@ -573,10 +573,11 @@ rustc_queries! {
         separate_provide_extern
     }
 
-    /// Returns coverage summary info for a function, after executing the `InstrumentCoverage`
-    /// MIR pass (assuming the -Cinstrument-coverage option is enabled).
-    query coverageinfo(key: ty::InstanceDef<'tcx>) -> &'tcx mir::CoverageInfo {
-        desc { |tcx| "retrieving coverage info from MIR for `{}`", tcx.def_path_str(key.def_id()) }
+    /// Summarizes coverage IDs inserted by the `InstrumentCoverage` MIR pass
+    /// (for compiler option `-Cinstrument-coverage`), after MIR optimizations
+    /// have had a chance to potentially remove some of them.
+    query coverage_ids_info(key: ty::InstanceDef<'tcx>) -> &'tcx mir::CoverageIdsInfo {
+        desc { |tcx| "retrieving coverage IDs info from MIR for `{}`", tcx.def_path_str(key.def_id()) }
         arena_cache
     }
 
diff --git a/compiler/rustc_mir_transform/src/coverage/counters.rs b/compiler/rustc_mir_transform/src/coverage/counters.rs
index 77e3dee1fef..938c4b2bd3f 100644
--- a/compiler/rustc_mir_transform/src/coverage/counters.rs
+++ b/compiler/rustc_mir_transform/src/coverage/counters.rs
@@ -126,6 +126,14 @@ impl CoverageCounters {
         next
     }
 
+    pub(super) fn num_counters(&self) -> usize {
+        self.next_counter_id.as_usize()
+    }
+
+    pub(super) fn num_expressions(&self) -> usize {
+        self.next_expression_id.as_usize()
+    }
+
     fn set_bcb_counter(
         &mut self,
         bcb: BasicCoverageBlock,
diff --git a/compiler/rustc_mir_transform/src/coverage/mod.rs b/compiler/rustc_mir_transform/src/coverage/mod.rs
index 1ce6ecc3bb5..e1b656bd981 100644
--- a/compiler/rustc_mir_transform/src/coverage/mod.rs
+++ b/compiler/rustc_mir_transform/src/coverage/mod.rs
@@ -214,6 +214,8 @@ impl<'a, 'tcx> Instrumentor<'a, 'tcx> {
 
         self.mir_body.function_coverage_info = Some(Box::new(FunctionCoverageInfo {
             function_source_hash: self.function_source_hash,
+            num_counters: self.coverage_counters.num_counters(),
+            num_expressions: self.coverage_counters.num_expressions(),
         }));
     }
 
diff --git a/compiler/rustc_mir_transform/src/coverage/query.rs b/compiler/rustc_mir_transform/src/coverage/query.rs
index 2c0164e765c..b1f46c8e05c 100644
--- a/compiler/rustc_mir_transform/src/coverage/query.rs
+++ b/compiler/rustc_mir_transform/src/coverage/query.rs
@@ -2,92 +2,33 @@ use super::*;
 
 use rustc_data_structures::captures::Captures;
 use rustc_middle::mir::coverage::*;
-use rustc_middle::mir::{self, Body, Coverage, CoverageInfo};
+use rustc_middle::mir::{self, Body, Coverage, CoverageIdsInfo};
 use rustc_middle::query::Providers;
 use rustc_middle::ty::{self, TyCtxt};
 use rustc_span::def_id::DefId;
 
 /// A `query` provider for retrieving coverage information injected into MIR.
 pub(crate) fn provide(providers: &mut Providers) {
-    providers.coverageinfo = |tcx, def_id| coverageinfo(tcx, def_id);
+    providers.coverage_ids_info = |tcx, def_id| coverage_ids_info(tcx, def_id);
     providers.covered_code_regions = |tcx, def_id| covered_code_regions(tcx, def_id);
 }
 
-/// Coverage codegen needs to know the total number of counter IDs and expression IDs that have
-/// been used by a function's coverage mappings. These totals are used to create vectors to hold
-/// the relevant counter and expression data, and the maximum counter ID (+ 1) is also needed by
-/// the `llvm.instrprof.increment` intrinsic.
-///
-/// MIR optimization may split and duplicate some BasicBlock sequences, or optimize out some code
-/// including injected counters. (It is OK if some counters are optimized out, but those counters
-/// are still included in the total `num_counters` or `num_expressions`.) Simply counting the
-/// calls may not work; but computing the number of counters or expressions by adding `1` to the
-/// highest ID (for a given instrumented function) is valid.
-///
-/// It's possible for a coverage expression to remain in MIR while one or both of its operands
-/// have been optimized away. To avoid problems in codegen, we include those operands' IDs when
-/// determining the maximum counter/expression ID, even if the underlying counter/expression is
-/// no longer present.
-struct CoverageVisitor {
-    max_counter_id: CounterId,
-    max_expression_id: ExpressionId,
-}
-
-impl CoverageVisitor {
-    /// Updates `max_counter_id` to the maximum encountered counter ID.
-    #[inline(always)]
-    fn update_max_counter_id(&mut self, counter_id: CounterId) {
-        self.max_counter_id = self.max_counter_id.max(counter_id);
-    }
-
-    /// Updates `max_expression_id` to the maximum encountered expression ID.
-    #[inline(always)]
-    fn update_max_expression_id(&mut self, expression_id: ExpressionId) {
-        self.max_expression_id = self.max_expression_id.max(expression_id);
-    }
-
-    fn update_from_expression_operand(&mut self, operand: Operand) {
-        match operand {
-            Operand::Counter(id) => self.update_max_counter_id(id),
-            Operand::Expression(id) => self.update_max_expression_id(id),
-            Operand::Zero => {}
-        }
-    }
-
-    fn visit_body(&mut self, body: &Body<'_>) {
-        for coverage in all_coverage_in_mir_body(body) {
-            self.visit_coverage(coverage);
-        }
-    }
-
-    fn visit_coverage(&mut self, coverage: &Coverage) {
-        match coverage.kind {
-            CoverageKind::Counter { id, .. } => self.update_max_counter_id(id),
-            CoverageKind::Expression { id, lhs, rhs, .. } => {
-                self.update_max_expression_id(id);
-                self.update_from_expression_operand(lhs);
-                self.update_from_expression_operand(rhs);
-            }
-            CoverageKind::Unreachable => {}
-        }
-    }
-}
-
-fn coverageinfo<'tcx>(tcx: TyCtxt<'tcx>, instance_def: ty::InstanceDef<'tcx>) -> CoverageInfo {
+/// Query implementation for `coverage_ids_info`.
+fn coverage_ids_info<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    instance_def: ty::InstanceDef<'tcx>,
+) -> CoverageIdsInfo {
     let mir_body = tcx.instance_mir(instance_def);
 
-    let mut coverage_visitor = CoverageVisitor {
-        max_counter_id: CounterId::START,
-        max_expression_id: ExpressionId::START,
-    };
-
-    coverage_visitor.visit_body(mir_body);
+    let max_counter_id = all_coverage_in_mir_body(mir_body)
+        .filter_map(|coverage| match coverage.kind {
+            CoverageKind::Counter { id } => Some(id),
+            _ => None,
+        })
+        .max()
+        .unwrap_or(CounterId::START);
 
-    // Add 1 to the highest IDs to get the total number of IDs.
-    CoverageInfo {
-        num_counters: (coverage_visitor.max_counter_id + 1).as_u32(),
-        num_expressions: (coverage_visitor.max_expression_id + 1).as_u32(),
-    }
+    CoverageIdsInfo { max_counter_id }
 }
 
 fn covered_code_regions(tcx: TyCtxt<'_>, def_id: DefId) -> Vec<&CodeRegion> {