about summary refs log tree commit diff
path: root/compiler/rustc_mir/src/transform/coverage/graph.rs
diff options
context:
space:
mode:
authorRich Kadel <richkadel@google.com>2020-10-23 00:45:07 -0700
committerRich Kadel <richkadel@google.com>2020-11-05 18:24:12 -0800
commitc7ae4c2cb6d7e58ad0f9c12047e3d747c26a9d71 (patch)
treefdbb5014c0353a7a29a61b07364575b04d81362a /compiler/rustc_mir/src/transform/coverage/graph.rs
parentc7747cc772f4b4c30ede3616678d7a3bc2c89bf7 (diff)
downloadrust-c7ae4c2cb6d7e58ad0f9c12047e3d747c26a9d71.tar.gz
rust-c7ae4c2cb6d7e58ad0f9c12047e3d747c26a9d71.zip
Splitting transform/instrument_coverage.rs into transform/coverage/...
Diffstat (limited to 'compiler/rustc_mir/src/transform/coverage/graph.rs')
-rw-r--r--compiler/rustc_mir/src/transform/coverage/graph.rs274
1 files changed, 274 insertions, 0 deletions
diff --git a/compiler/rustc_mir/src/transform/coverage/graph.rs b/compiler/rustc_mir/src/transform/coverage/graph.rs
new file mode 100644
index 00000000000..c0a698833a1
--- /dev/null
+++ b/compiler/rustc_mir/src/transform/coverage/graph.rs
@@ -0,0 +1,274 @@
+use rustc_index::bit_set::BitSet;
+use rustc_index::vec::IndexVec;
+use rustc_middle::mir::{self, BasicBlock, BasicBlockData, TerminatorKind};
+
+const ID_SEPARATOR: &str = ",";
+
+/// A BasicCoverageBlock (BCB) represents the maximal-length sequence of CFG (MIR) BasicBlocks
+/// without conditional branches.
+///
+/// The BCB allows coverage analysis to be performed on a simplified projection of the underlying
+/// MIR CFG, without altering the original CFG. Note that running the MIR `SimplifyCfg` transform,
+/// is not sufficient, and therefore not necessary, since the BCB-based CFG projection is a more
+/// aggressive simplification. For example:
+///
+///   * The BCB CFG projection ignores (trims) branches not relevant to coverage, such as unwind-
+///     related code that is injected by the Rust compiler but has no physical source code to
+///     count. This also means a BasicBlock with a `Call` terminator can be merged into its
+///     primary successor target block, in the same BCB.
+///   * Some BasicBlock terminators support Rust-specific concerns--like borrow-checking--that are
+///     not relevant to coverage analysis. `FalseUnwind`, for example, can be treated the same as
+///     a `Goto`, and merged with its successor into the same BCB.
+///
+/// Each BCB with at least one computed `CoverageSpan` will have no more than one `Counter`.
+/// In some cases, a BCB's execution count can be computed by `CounterExpression`. Additional
+/// disjoint `CoverageSpan`s in a BCB can also be counted by `CounterExpression` (by adding `ZERO`
+/// to the BCB's primary counter or expression).
+///
+/// Dominator/dominated relationships (which are fundamental to the coverage analysis algorithm)
+/// between two BCBs can be computed using the `mir::Body` `dominators()` with any `BasicBlock`
+/// member of each BCB. (For consistency, BCB's use the first `BasicBlock`, also referred to as the
+/// `bcb_leader_bb`.)
+///
+/// The BCB CFG projection is critical to simplifying the coverage analysis by ensuring graph
+/// path-based queries (`is_dominated_by()`, `predecessors`, `successors`, etc.) have branch
+/// (control flow) significance.
+#[derive(Debug, Clone)]
+pub(crate) struct BasicCoverageBlock {
+    pub blocks: Vec<BasicBlock>,
+}
+
+impl BasicCoverageBlock {
+    pub fn leader_bb(&self) -> BasicBlock {
+        self.blocks[0]
+    }
+
+    pub fn id(&self) -> String {
+        format!(
+            "@{}",
+            self.blocks
+                .iter()
+                .map(|bb| bb.index().to_string())
+                .collect::<Vec<_>>()
+                .join(ID_SEPARATOR)
+        )
+    }
+}
+
+pub(crate) struct BasicCoverageBlocks {
+    vec: IndexVec<BasicBlock, Option<BasicCoverageBlock>>,
+}
+
+impl BasicCoverageBlocks {
+    pub fn from_mir(mir_body: &mir::Body<'tcx>) -> Self {
+        let mut basic_coverage_blocks =
+            BasicCoverageBlocks { vec: IndexVec::from_elem_n(None, mir_body.basic_blocks().len()) };
+        basic_coverage_blocks.extract_from_mir(mir_body);
+        basic_coverage_blocks
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &BasicCoverageBlock> {
+        self.vec.iter().filter_map(|bcb| bcb.as_ref())
+    }
+
+    pub fn num_nodes(&self) -> usize {
+        self.vec.len()
+    }
+
+    pub fn extract_from_mir(&mut self, mir_body: &mir::Body<'tcx>) {
+        // Traverse the CFG but ignore anything following an `unwind`
+        let cfg_without_unwind = ShortCircuitPreorder::new(&mir_body, |term_kind| {
+            let mut successors = term_kind.successors();
+            match &term_kind {
+                // SwitchInt successors are never unwind, and all of them should be traversed.
+
+                // NOTE: TerminatorKind::FalseEdge targets from SwitchInt don't appear to be
+                // helpful in identifying unreachable code. I did test the theory, but the following
+                // changes were not beneficial. (I assumed that replacing some constants with
+                // non-deterministic variables might effect which blocks were targeted by a
+                // `FalseEdge` `imaginary_target`. It did not.)
+                //
+                // Also note that, if there is a way to identify BasicBlocks that are part of the
+                // MIR CFG, but not actually reachable, here are some other things to consider:
+                //
+                // Injecting unreachable code regions will probably require computing the set
+                // difference between the basic blocks found without filtering out unreachable
+                // blocks, and the basic blocks found with the filter; then computing the
+                // `CoverageSpans` without the filter; and then injecting `Counter`s or
+                // `CounterExpression`s for blocks that are not unreachable, or injecting
+                // `Unreachable` code regions otherwise. This seems straightforward, but not
+                // trivial.
+                //
+                // Alternatively, we might instead want to leave the unreachable blocks in
+                // (bypass the filter here), and inject the counters. This will result in counter
+                // values of zero (0) for unreachable code (and, notably, the code will be displayed
+                // with a red background by `llvm-cov show`).
+                //
+                // TerminatorKind::SwitchInt { .. } => {
+                //     let some_imaginary_target = successors.clone().find_map(|&successor| {
+                //         let term = mir_body.basic_blocks()[successor].terminator();
+                //         if let TerminatorKind::FalseEdge { imaginary_target, .. } = term.kind {
+                //             if mir_body.predecessors()[imaginary_target].len() == 1 {
+                //                 return Some(imaginary_target);
+                //             }
+                //         }
+                //         None
+                //     });
+                //     if let Some(imaginary_target) = some_imaginary_target {
+                //         box successors.filter(move |&&successor| successor != imaginary_target)
+                //     } else {
+                //         box successors
+                //     }
+                // }
+                //
+                // Note this also required changing the closure signature for the
+                // `ShortCurcuitPreorder` to:
+                //
+                // F: Fn(&'tcx TerminatorKind<'tcx>) -> Box<dyn Iterator<Item = &BasicBlock> + 'a>,
+                TerminatorKind::SwitchInt { .. } => successors,
+
+                // For all other kinds, return only the first successor, if any, and ignore unwinds
+                _ => successors.next().into_iter().chain(&[]),
+            }
+        });
+
+        // Walk the CFG using a Preorder traversal, which starts from `START_BLOCK` and follows
+        // each block terminator's `successors()`. Coverage spans must map to actual source code,
+        // so compiler generated blocks and paths can be ignored. To that end the CFG traversal
+        // intentionally omits unwind paths.
+        let mut blocks = Vec::new();
+        for (bb, data) in cfg_without_unwind {
+            if let Some(last) = blocks.last() {
+                let predecessors = &mir_body.predecessors()[bb];
+                if predecessors.len() > 1 || !predecessors.contains(last) {
+                    // The `bb` has more than one _incoming_ edge, and should start its own
+                    // `BasicCoverageBlock`. (Note, the `blocks` vector does not yet include `bb`;
+                    // it contains a sequence of one or more sequential blocks with no intermediate
+                    // branches in or out. Save these as a new `BasicCoverageBlock` before starting
+                    // the new one.)
+                    self.add_basic_coverage_block(blocks.split_off(0));
+                    debug!(
+                        "  because {}",
+                        if predecessors.len() > 1 {
+                            "predecessors.len() > 1".to_owned()
+                        } else {
+                            format!("bb {} is not in precessors: {:?}", bb.index(), predecessors)
+                        }
+                    );
+                }
+            }
+            blocks.push(bb);
+
+            let term = data.terminator();
+
+            match term.kind {
+                TerminatorKind::Return { .. }
+                | TerminatorKind::Abort
+                | TerminatorKind::Assert { .. }
+                | TerminatorKind::Yield { .. }
+                | TerminatorKind::SwitchInt { .. } => {
+                    // The `bb` has more than one _outgoing_ edge, or exits the function. Save the
+                    // current sequence of `blocks` gathered to this point, as a new
+                    // `BasicCoverageBlock`.
+                    self.add_basic_coverage_block(blocks.split_off(0));
+                    debug!("  because term.kind = {:?}", term.kind);
+                    // Note that this condition is based on `TerminatorKind`, even though it
+                    // theoretically boils down to `successors().len() != 1`; that is, either zero
+                    // (e.g., `Return`, `Abort`) or multiple successors (e.g., `SwitchInt`), but
+                    // since the Coverage graph (the BCB CFG projection) ignores things like unwind
+                    // branches (which exist in the `Terminator`s `successors()` list) checking the
+                    // number of successors won't work.
+                }
+                TerminatorKind::Goto { .. }
+                | TerminatorKind::Resume
+                | TerminatorKind::Unreachable
+                | TerminatorKind::Drop { .. }
+                | TerminatorKind::DropAndReplace { .. }
+                | TerminatorKind::Call { .. }
+                | TerminatorKind::GeneratorDrop
+                | TerminatorKind::FalseEdge { .. }
+                | TerminatorKind::FalseUnwind { .. }
+                | TerminatorKind::InlineAsm { .. } => {}
+            }
+        }
+
+        if !blocks.is_empty() {
+            // process any remaining blocks into a final `BasicCoverageBlock`
+            self.add_basic_coverage_block(blocks.split_off(0));
+            debug!("  because the end of the CFG was reached while traversing");
+        }
+    }
+
+    fn add_basic_coverage_block(&mut self, blocks: Vec<BasicBlock>) {
+        let leader_bb = blocks[0];
+        let bcb = BasicCoverageBlock { blocks };
+        debug!("adding BCB: {:?}", bcb);
+        self.vec[leader_bb] = Some(bcb);
+    }
+}
+
+impl std::ops::Index<BasicBlock> for BasicCoverageBlocks {
+    type Output = BasicCoverageBlock;
+
+    fn index(&self, index: BasicBlock) -> &Self::Output {
+        self.vec[index].as_ref().expect("is_some if BasicBlock is a BasicCoverageBlock leader")
+    }
+}
+
+pub struct ShortCircuitPreorder<
+    'a,
+    'tcx,
+    F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>,
+> {
+    body: &'a mir::Body<'tcx>,
+    visited: BitSet<BasicBlock>,
+    worklist: Vec<BasicBlock>,
+    filtered_successors: F,
+}
+
+impl<'a, 'tcx, F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>>
+    ShortCircuitPreorder<'a, 'tcx, F>
+{
+    pub fn new(
+        body: &'a mir::Body<'tcx>,
+        filtered_successors: F,
+    ) -> ShortCircuitPreorder<'a, 'tcx, F> {
+        let worklist = vec![mir::START_BLOCK];
+
+        ShortCircuitPreorder {
+            body,
+            visited: BitSet::new_empty(body.basic_blocks().len()),
+            worklist,
+            filtered_successors,
+        }
+    }
+}
+
+impl<'a: 'tcx, 'tcx, F: Fn(&'tcx TerminatorKind<'tcx>) -> mir::Successors<'tcx>> Iterator
+    for ShortCircuitPreorder<'a, 'tcx, F>
+{
+    type Item = (BasicBlock, &'a BasicBlockData<'tcx>);
+
+    fn next(&mut self) -> Option<(BasicBlock, &'a BasicBlockData<'tcx>)> {
+        while let Some(idx) = self.worklist.pop() {
+            if !self.visited.insert(idx) {
+                continue;
+            }
+
+            let data = &self.body[idx];
+
+            if let Some(ref term) = data.terminator {
+                self.worklist.extend((self.filtered_successors)(&term.kind));
+            }
+
+            return Some((idx, data));
+        }
+
+        None
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let size = self.body.basic_blocks().len() - self.visited.count();
+        (size, Some(size))
+    }
+}