diff options
| author | Stuart Cook <Zalathar@users.noreply.github.com> | 2025-09-01 17:35:03 +1000 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-01 17:35:03 +1000 | 
| commit | 88c254f8d4bf6ee21cd39582d5974915d71c42b3 (patch) | |
| tree | ff3b6129e68e8d466033552859db18b942de9fc4 /compiler/rustc_mir_transform/src | |
| parent | 1d520e26943d884d1d32d8ec4731d28cc4a14ec5 (diff) | |
| parent | c2eb45b4a176dba37a0bce8fa9c4d46cf0d55e24 (diff) | |
| download | rust-88c254f8d4bf6ee21cd39582d5974915d71c42b3.tar.gz rust-88c254f8d4bf6ee21cd39582d5974915d71c42b3.zip  | |
Rollup merge of #145643 - Zalathar:tree, r=SparrowLii
coverage: Build an "expansion tree" and use it to unexpand raw spans Historically and currently, coverage instrumentation assumes that all of a function's spans are in the same file and have the same syntax context. The spans extracted directly from MIR don't satisfy that assumption, so there is an “unexpansion” step that walks up each span's expansion-call-site tree to find a suitable span in the same context as the function's body span. (That unexpansion step is what allows us to have somewhat reasonable coverage instrumentation for macros like `println!`, and for syntax like `for` and `?` that undergo desugaring expansion.) The current unexpansion code mostly works fine in that “flat” single-file single-context world. But it's not suitable for incremental work towards proper expansion-aware coverage instrumentation, which would allow a function's coverage spans to encompass multiple expansion contexts and multiple files. This PR therefore replaces the current unexpansion code with a more sophisticated system that uses the raw MIR spans to reconstruct an “expansion tree”, and then uses that tree to help perform most of the unexpansion work. Building the tree is “overkill” for current unexpansion needs (though it does give some minor edge-case improvements), but my hope is that having the explicit tree available will be a big help when taking the next steps towards proper expansion-region support.
Diffstat (limited to 'compiler/rustc_mir_transform/src')
| -rw-r--r-- | compiler/rustc_mir_transform/src/coverage/expansion.rs | 127 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/coverage/mod.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/coverage/spans.rs | 142 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs | 34 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/coverage/unexpand.rs | 48 | ||||
| -rw-r--r-- | compiler/rustc_mir_transform/src/lib.rs | 1 | 
6 files changed, 212 insertions, 141 deletions
diff --git a/compiler/rustc_mir_transform/src/coverage/expansion.rs b/compiler/rustc_mir_transform/src/coverage/expansion.rs new file mode 100644 index 00000000000..91e0528f52f --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/expansion.rs @@ -0,0 +1,127 @@ +use rustc_data_structures::fx::{FxIndexMap, FxIndexSet, IndexEntry}; +use rustc_middle::mir::coverage::BasicCoverageBlock; +use rustc_span::{ExpnId, ExpnKind, Span}; + +#[derive(Clone, Copy, Debug)] +pub(crate) struct SpanWithBcb { + pub(crate) span: Span, + pub(crate) bcb: BasicCoverageBlock, +} + +#[derive(Debug)] +pub(crate) struct ExpnTree { + nodes: FxIndexMap<ExpnId, ExpnNode>, +} + +impl ExpnTree { + pub(crate) fn get(&self, expn_id: ExpnId) -> Option<&ExpnNode> { + self.nodes.get(&expn_id) + } + + /// Yields the tree node for the given expansion ID (if present), followed + /// by the nodes of all of its descendants in depth-first order. + pub(crate) fn iter_node_and_descendants( + &self, + root_expn_id: ExpnId, + ) -> impl Iterator<Item = &ExpnNode> { + gen move { + let Some(root_node) = self.get(root_expn_id) else { return }; + yield root_node; + + // Stack of child-node-ID iterators that drives the depth-first traversal. + let mut iter_stack = vec![root_node.child_expn_ids.iter()]; + + while let Some(curr_iter) = iter_stack.last_mut() { + // Pull the next ID from the top of the stack. + let Some(&curr_id) = curr_iter.next() else { + iter_stack.pop(); + continue; + }; + + // Yield this node. + let Some(node) = self.get(curr_id) else { continue }; + yield node; + + // Push the node's children, to be traversed next. + if !node.child_expn_ids.is_empty() { + iter_stack.push(node.child_expn_ids.iter()); + } + } + } + } +} + +#[derive(Debug)] +pub(crate) struct ExpnNode { + /// Storing the expansion ID in its own node is not strictly necessary, + /// but is helpful for debugging and might be useful later. + #[expect(dead_code)] + pub(crate) expn_id: ExpnId, + + // Useful info extracted from `ExpnData`. + pub(crate) expn_kind: ExpnKind, + /// Non-dummy `ExpnData::call_site` span. + pub(crate) call_site: Option<Span>, + /// Expansion ID of `call_site`, if present. + /// This links an expansion node to its parent in the tree. + pub(crate) call_site_expn_id: Option<ExpnId>, + + /// Spans (and their associated BCBs) belonging to this expansion. + pub(crate) spans: Vec<SpanWithBcb>, + /// Expansions whose call-site is in this expansion. + pub(crate) child_expn_ids: FxIndexSet<ExpnId>, +} + +impl ExpnNode { + fn new(expn_id: ExpnId) -> Self { + let expn_data = expn_id.expn_data(); + + let call_site = Some(expn_data.call_site).filter(|sp| !sp.is_dummy()); + let call_site_expn_id = try { call_site?.ctxt().outer_expn() }; + + Self { + expn_id, + + expn_kind: expn_data.kind.clone(), + call_site, + call_site_expn_id, + + spans: vec![], + child_expn_ids: FxIndexSet::default(), + } + } +} + +/// Given a collection of span/BCB pairs from potentially-different syntax contexts, +/// arranges them into an "expansion tree" based on their expansion call-sites. +pub(crate) fn build_expn_tree(spans: impl IntoIterator<Item = SpanWithBcb>) -> ExpnTree { + let mut nodes = FxIndexMap::default(); + let new_node = |&expn_id: &ExpnId| ExpnNode::new(expn_id); + + for span_with_bcb in spans { + // Create a node for this span's enclosing expansion, and add the span to it. + let expn_id = span_with_bcb.span.ctxt().outer_expn(); + let node = nodes.entry(expn_id).or_insert_with_key(new_node); + node.spans.push(span_with_bcb); + + // Now walk up the expansion call-site chain, creating nodes and registering children. + let mut prev = expn_id; + let mut curr_expn_id = node.call_site_expn_id; + while let Some(expn_id) = curr_expn_id { + let entry = nodes.entry(expn_id); + let node_existed = matches!(entry, IndexEntry::Occupied(_)); + + let node = entry.or_insert_with_key(new_node); + node.child_expn_ids.insert(prev); + + if node_existed { + break; + } + + prev = expn_id; + curr_expn_id = node.call_site_expn_id; + } + } + + ExpnTree { nodes } +} diff --git a/compiler/rustc_mir_transform/src/coverage/mod.rs b/compiler/rustc_mir_transform/src/coverage/mod.rs index c5fef299244..08c7d346009 100644 --- a/compiler/rustc_mir_transform/src/coverage/mod.rs +++ b/compiler/rustc_mir_transform/src/coverage/mod.rs @@ -8,6 +8,7 @@ use crate::coverage::graph::CoverageGraph; use crate::coverage::mappings::ExtractedMappings; mod counters; +mod expansion; mod graph; mod hir_info; mod mappings; diff --git a/compiler/rustc_mir_transform/src/coverage/spans.rs b/compiler/rustc_mir_transform/src/coverage/spans.rs index d1b04c8f587..325935ee846 100644 --- a/compiler/rustc_mir_transform/src/coverage/spans.rs +++ b/compiler/rustc_mir_transform/src/coverage/spans.rs @@ -1,15 +1,14 @@ -use rustc_data_structures::fx::FxHashSet; use rustc_middle::mir; use rustc_middle::mir::coverage::{Mapping, MappingKind, START_BCB}; use rustc_middle::ty::TyCtxt; use rustc_span::source_map::SourceMap; -use rustc_span::{BytePos, DesugaringKind, ExpnKind, MacroKind, Span}; +use rustc_span::{BytePos, DesugaringKind, ExpnId, ExpnKind, MacroKind, Span}; use tracing::instrument; +use crate::coverage::expansion::{self, ExpnTree, SpanWithBcb}; use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph}; use crate::coverage::hir_info::ExtractedHirInfo; -use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir, SpanFromMir}; -use crate::coverage::unexpand; +use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir}; mod from_mir; @@ -34,19 +33,51 @@ pub(super) fn extract_refined_covspans<'tcx>( let &ExtractedHirInfo { body_span, .. } = hir_info; let raw_spans = from_mir::extract_raw_spans_from_mir(mir_body, graph); - let mut covspans = raw_spans - .into_iter() - .filter_map(|RawSpanFromMir { raw_span, bcb }| try { - let (span, expn_kind) = - unexpand::unexpand_into_body_span_with_expn_kind(raw_span, body_span)?; - // Discard any spans that fill the entire body, because they tend - // to represent compiler-inserted code, e.g. implicitly returning `()`. - if span.source_equal(body_span) { - return None; - }; - SpanFromMir { span, expn_kind, bcb } - }) - .collect::<Vec<_>>(); + // Use the raw spans to build a tree of expansions for this function. + let expn_tree = expansion::build_expn_tree( + raw_spans + .into_iter() + .map(|RawSpanFromMir { raw_span, bcb }| SpanWithBcb { span: raw_span, bcb }), + ); + + let mut covspans = vec![]; + let mut push_covspan = |covspan: Covspan| { + let covspan_span = covspan.span; + // Discard any spans not contained within the function body span. + // Also discard any spans that fill the entire body, because they tend + // to represent compiler-inserted code, e.g. implicitly returning `()`. + if !body_span.contains(covspan_span) || body_span.source_equal(covspan_span) { + return; + } + + // Each pushed covspan should have the same context as the body span. + // If it somehow doesn't, discard the covspan, or panic in debug builds. + if !body_span.eq_ctxt(covspan_span) { + debug_assert!( + false, + "span context mismatch: body_span={body_span:?}, covspan.span={covspan_span:?}" + ); + return; + } + + covspans.push(covspan); + }; + + if let Some(node) = expn_tree.get(body_span.ctxt().outer_expn()) { + for &SpanWithBcb { span, bcb } in &node.spans { + push_covspan(Covspan { span, bcb }); + } + + // For each expansion with its call-site in the body span, try to + // distill a corresponding covspan. + for &child_expn_id in &node.child_expn_ids { + if let Some(covspan) = + single_covspan_for_child_expn(tcx, graph, &expn_tree, child_expn_id) + { + push_covspan(covspan); + } + } + } // Only proceed if we found at least one usable span. if covspans.is_empty() { @@ -57,17 +88,10 @@ pub(super) fn extract_refined_covspans<'tcx>( // Otherwise, add a fake span at the start of the body, to avoid an ugly // gap between the start of the body and the first real span. // FIXME: Find a more principled way to solve this problem. - covspans.push(SpanFromMir::for_fn_sig( - hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()), - )); - - // First, perform the passes that need macro information. - covspans.sort_by(|a, b| graph.cmp_in_dominator_order(a.bcb, b.bcb)); - remove_unwanted_expansion_spans(&mut covspans); - shrink_visible_macro_spans(tcx, &mut covspans); - - // We no longer need the extra information in `SpanFromMir`, so convert to `Covspan`. - let mut covspans = covspans.into_iter().map(SpanFromMir::into_covspan).collect::<Vec<_>>(); + covspans.push(Covspan { + span: hir_info.fn_sig_span.unwrap_or_else(|| body_span.shrink_to_lo()), + bcb: START_BCB, + }); let compare_covspans = |a: &Covspan, b: &Covspan| { compare_spans(a.span, b.span) @@ -117,43 +141,37 @@ pub(super) fn extract_refined_covspans<'tcx>( })); } -/// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate -/// multiple condition/consequent blocks that have the span of the whole macro -/// invocation, which is unhelpful. Keeping only the first such span seems to -/// give better mappings, so remove the others. -/// -/// Similarly, `await` expands to a branch on the discriminant of `Poll`, which -/// leads to incorrect coverage if the `Future` is immediately ready (#98712). -/// -/// (The input spans should be sorted in BCB dominator order, so that the -/// retained "first" span is likely to dominate the others.) -fn remove_unwanted_expansion_spans(covspans: &mut Vec<SpanFromMir>) { - let mut deduplicated_spans = FxHashSet::default(); - - covspans.retain(|covspan| { - match covspan.expn_kind { - // Retain only the first await-related or macro-expanded covspan with this span. - Some(ExpnKind::Desugaring(DesugaringKind::Await)) => { - deduplicated_spans.insert(covspan.span) - } - Some(ExpnKind::Macro(MacroKind::Bang, _)) => deduplicated_spans.insert(covspan.span), - // Ignore (retain) other spans. - _ => true, +/// For a single child expansion, try to distill it into a single span+BCB mapping. +fn single_covspan_for_child_expn( + tcx: TyCtxt<'_>, + graph: &CoverageGraph, + expn_tree: &ExpnTree, + expn_id: ExpnId, +) -> Option<Covspan> { + let node = expn_tree.get(expn_id)?; + + let bcbs = + expn_tree.iter_node_and_descendants(expn_id).flat_map(|n| n.spans.iter().map(|s| s.bcb)); + + let bcb = match node.expn_kind { + // For bang-macros (e.g. `assert!`, `trace!`) and for `await`, taking + // the "first" BCB in dominator order seems to give good results. + ExpnKind::Macro(MacroKind::Bang, _) | ExpnKind::Desugaring(DesugaringKind::Await) => { + bcbs.min_by(|&a, &b| graph.cmp_in_dominator_order(a, b))? } - }); -} - -/// When a span corresponds to a macro invocation that is visible from the -/// function body, truncate it to just the macro name plus `!`. -/// This seems to give better results for code that uses macros. -fn shrink_visible_macro_spans(tcx: TyCtxt<'_>, covspans: &mut Vec<SpanFromMir>) { - let source_map = tcx.sess.source_map(); + // For other kinds of expansion, taking the "last" (most-dominated) BCB + // seems to give good results. + _ => bcbs.max_by(|&a, &b| graph.cmp_in_dominator_order(a, b))?, + }; - for covspan in covspans { - if matches!(covspan.expn_kind, Some(ExpnKind::Macro(MacroKind::Bang, _))) { - covspan.span = source_map.span_through_char(covspan.span, '!'); - } + // For bang-macro expansions, limit the call-site span to just the macro + // name plus `!`, excluding the macro arguments. + let mut span = node.call_site?; + if matches!(node.expn_kind, ExpnKind::Macro(MacroKind::Bang, _)) { + span = tcx.sess.source_map().span_through_char(span, '!'); } + + Some(Covspan { span, bcb }) } /// Discard all covspans that overlap a hole. diff --git a/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs b/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs index 7985e1c0798..dfeaa90dc2e 100644 --- a/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs +++ b/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs @@ -5,10 +5,9 @@ use rustc_middle::mir::coverage::CoverageKind; use rustc_middle::mir::{ self, FakeReadCause, Statement, StatementKind, Terminator, TerminatorKind, }; -use rustc_span::{ExpnKind, Span}; +use rustc_span::Span; -use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph, START_BCB}; -use crate::coverage::spans::Covspan; +use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph}; #[derive(Debug)] pub(crate) struct RawSpanFromMir { @@ -160,32 +159,3 @@ impl Hole { true } } - -#[derive(Debug)] -pub(crate) struct SpanFromMir { - /// A span that has been extracted from MIR and then "un-expanded" back to - /// within the current function's `body_span`. After various intermediate - /// processing steps, this span is emitted as part of the final coverage - /// mappings. - /// - /// With the exception of `fn_sig_span`, this should always be contained - /// within `body_span`. - pub(crate) span: Span, - pub(crate) expn_kind: Option<ExpnKind>, - pub(crate) bcb: BasicCoverageBlock, -} - -impl SpanFromMir { - pub(crate) fn for_fn_sig(fn_sig_span: Span) -> Self { - Self::new(fn_sig_span, None, START_BCB) - } - - pub(crate) fn new(span: Span, expn_kind: Option<ExpnKind>, bcb: BasicCoverageBlock) -> Self { - Self { span, expn_kind, bcb } - } - - pub(crate) fn into_covspan(self) -> Covspan { - let Self { span, expn_kind: _, bcb } = self; - Covspan { span, bcb } - } -} diff --git a/compiler/rustc_mir_transform/src/coverage/unexpand.rs b/compiler/rustc_mir_transform/src/coverage/unexpand.rs index cb861544736..922edd3cc4f 100644 --- a/compiler/rustc_mir_transform/src/coverage/unexpand.rs +++ b/compiler/rustc_mir_transform/src/coverage/unexpand.rs @@ -1,4 +1,4 @@ -use rustc_span::{ExpnKind, Span}; +use rustc_span::Span; /// Walks through the expansion ancestors of `original_span` to find a span that /// is contained in `body_span` and has the same [syntax context] as `body_span`. @@ -7,49 +7,3 @@ pub(crate) fn unexpand_into_body_span(original_span: Span, body_span: Span) -> O // we can just delegate directly to `find_ancestor_inside_same_ctxt`. original_span.find_ancestor_inside_same_ctxt(body_span) } - -/// Walks through the expansion ancestors of `original_span` to find a span that -/// is contained in `body_span` and has the same [syntax context] as `body_span`. -/// -/// If the returned span represents a bang-macro invocation (e.g. `foo!(..)`), -/// the returned symbol will be the name of that macro (e.g. `foo`). -pub(crate) fn unexpand_into_body_span_with_expn_kind( - original_span: Span, - body_span: Span, -) -> Option<(Span, Option<ExpnKind>)> { - let (span, prev) = unexpand_into_body_span_with_prev(original_span, body_span)?; - - let expn_kind = prev.map(|prev| prev.ctxt().outer_expn_data().kind); - - Some((span, expn_kind)) -} - -/// Walks through the expansion ancestors of `original_span` to find a span that -/// is contained in `body_span` and has the same [syntax context] as `body_span`. -/// The ancestor that was traversed just before the matching span (if any) is -/// also returned. -/// -/// For example, a return value of `Some((ancestor, Some(prev)))` means that: -/// - `ancestor == original_span.find_ancestor_inside_same_ctxt(body_span)` -/// - `prev.parent_callsite() == ancestor` -/// -/// [syntax context]: rustc_span::SyntaxContext -fn unexpand_into_body_span_with_prev( - original_span: Span, - body_span: Span, -) -> Option<(Span, Option<Span>)> { - let mut prev = None; - let mut curr = original_span; - - while !body_span.contains(curr) || !curr.eq_ctxt(body_span) { - prev = Some(curr); - curr = curr.parent_callsite()?; - } - - debug_assert_eq!(Some(curr), original_span.find_ancestor_inside_same_ctxt(body_span)); - if let Some(prev) = prev { - debug_assert_eq!(Some(curr), prev.parent_callsite()); - } - - Some((curr, prev)) -} diff --git a/compiler/rustc_mir_transform/src/lib.rs b/compiler/rustc_mir_transform/src/lib.rs index 08f25276cec..8f319e64916 100644 --- a/compiler/rustc_mir_transform/src/lib.rs +++ b/compiler/rustc_mir_transform/src/lib.rs @@ -5,6 +5,7 @@ #![feature(const_type_name)] #![feature(cow_is_borrowed)] #![feature(file_buffered)] +#![feature(gen_blocks)] #![feature(if_let_guard)] #![feature(impl_trait_in_assoc_type)] #![feature(try_blocks)]  | 
