diff options
| author | bors <bors@rust-lang.org> | 2023-09-05 15:30:59 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-09-05 15:30:59 +0000 |
| commit | ab45885dec2a6552cb060a5b7183653baaecd580 (patch) | |
| tree | cbc89120fae2ede8bdc74e158fb7724cbe567852 /src | |
| parent | f222a2dd8f6391e6433f57a7c5f1514166edbec1 (diff) | |
| parent | 3141177995f52fc3cbb64d66cf0a98ea0f754fca (diff) | |
| download | rust-ab45885dec2a6552cb060a5b7183653baaecd580.tar.gz rust-ab45885dec2a6552cb060a5b7183653baaecd580.zip | |
Auto merge of #114843 - Zalathar:test-coverage-map, r=oli-obk
coverage: Explicitly test the coverage maps produced by codegen/LLVM Our existing coverage tests verify the output of end-to-end coverage reports, but we don't have any way to test the specific mapping information (code regions and their associated counters) that are emitted by `rustc_codegen_llvm` and LLVM. That makes it harder to to be confident in changes that would modify those mappings (whether deliberately or accidentally). This PR addresses that by adding a new `coverage-map` test suite that does the following: - Compiles test files to LLVM IR assembly (`.ll`) - Feeds those IR files to a custom tool (`src/tools/coverage-dump`) that extracts and decodes coverage mappings, and prints them in a more human-readable format - Checks the output of that tool against known-good snapshots --- I recommend excluding the last commit while reviewing the main changes, because that last commit is just ~40 test files copied over from `tests/run-coverage`, plus their blessed coverage-map snapshots and a readme file. Those snapshots aren't really intended to be checked by hand; they're mostly there to increase the chances that an unintended change to coverage maps will be observable (even if it requires relatively specific circumstances to manifest).
Diffstat (limited to 'src')
| -rw-r--r-- | src/bootstrap/builder.rs | 4 | ||||
| -rw-r--r-- | src/bootstrap/test.rs | 14 | ||||
| -rw-r--r-- | src/bootstrap/tool.rs | 1 | ||||
| -rw-r--r-- | src/tools/compiletest/src/common.rs | 6 | ||||
| -rw-r--r-- | src/tools/compiletest/src/lib.rs | 2 | ||||
| -rw-r--r-- | src/tools/compiletest/src/runtest.rs | 71 | ||||
| -rw-r--r-- | src/tools/coverage-dump/Cargo.toml | 14 | ||||
| -rw-r--r-- | src/tools/coverage-dump/README.md | 8 | ||||
| -rw-r--r-- | src/tools/coverage-dump/src/covfun.rs | 296 | ||||
| -rw-r--r-- | src/tools/coverage-dump/src/main.rs | 17 | ||||
| -rw-r--r-- | src/tools/coverage-dump/src/parser.rs | 80 | ||||
| -rw-r--r-- | src/tools/coverage-dump/src/parser/tests.rs | 38 | ||||
| -rw-r--r-- | src/tools/coverage-dump/src/prf_names.rs | 87 |
13 files changed, 632 insertions, 6 deletions
diff --git a/src/bootstrap/builder.rs b/src/bootstrap/builder.rs index b3666192853..a24a6a4636d 100644 --- a/src/bootstrap/builder.rs +++ b/src/bootstrap/builder.rs @@ -703,7 +703,8 @@ impl<'a> Builder<'a> { llvm::Lld, llvm::CrtBeginEnd, tool::RustdocGUITest, - tool::OptimizedDist + tool::OptimizedDist, + tool::CoverageDump, ), Kind::Check | Kind::Clippy | Kind::Fix => describe!( check::Std, @@ -725,6 +726,7 @@ impl<'a> Builder<'a> { test::Tidy, test::Ui, test::RunPassValgrind, + test::CoverageMap, test::RunCoverage, test::MirOpt, test::Codegen, diff --git a/src/bootstrap/test.rs b/src/bootstrap/test.rs index d1018978f78..d78e0deda69 100644 --- a/src/bootstrap/test.rs +++ b/src/bootstrap/test.rs @@ -1340,6 +1340,12 @@ host_test!(RunMakeFullDeps { default_test!(Assembly { path: "tests/assembly", mode: "assembly", suite: "assembly" }); +default_test!(CoverageMap { + path: "tests/coverage-map", + mode: "coverage-map", + suite: "coverage-map" +}); + host_test!(RunCoverage { path: "tests/run-coverage", mode: "run-coverage", suite: "run-coverage" }); host_test!(RunCoverageRustdoc { path: "tests/run-coverage-rustdoc", @@ -1545,6 +1551,14 @@ note: if you're sure you want to do this, please open an issue as to why. In the .arg(builder.ensure(tool::JsonDocLint { compiler: json_compiler, target })); } + if mode == "coverage-map" { + let coverage_dump = builder.ensure(tool::CoverageDump { + compiler: compiler.with_stage(0), + target: compiler.host, + }); + cmd.arg("--coverage-dump-path").arg(coverage_dump); + } + if mode == "run-make" || mode == "run-coverage" { let rust_demangler = builder .ensure(tool::RustDemangler { diff --git a/src/bootstrap/tool.rs b/src/bootstrap/tool.rs index 07ff3da6b4a..f094dd9d7c9 100644 --- a/src/bootstrap/tool.rs +++ b/src/bootstrap/tool.rs @@ -306,6 +306,7 @@ bootstrap_tool!( GenerateWindowsSys, "src/tools/generate-windows-sys", "generate-windows-sys"; RustdocGUITest, "src/tools/rustdoc-gui-test", "rustdoc-gui-test", is_unstable_tool = true, allow_features = "test"; OptimizedDist, "src/tools/opt-dist", "opt-dist"; + CoverageDump, "src/tools/coverage-dump", "coverage-dump"; ); #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] diff --git a/src/tools/compiletest/src/common.rs b/src/tools/compiletest/src/common.rs index 7c17e92d0df..b91d5a958bb 100644 --- a/src/tools/compiletest/src/common.rs +++ b/src/tools/compiletest/src/common.rs @@ -66,6 +66,7 @@ string_enum! { JsDocTest => "js-doc-test", MirOpt => "mir-opt", Assembly => "assembly", + CoverageMap => "coverage-map", RunCoverage => "run-coverage", } } @@ -161,6 +162,9 @@ pub struct Config { /// The rust-demangler executable. pub rust_demangler_path: Option<PathBuf>, + /// The coverage-dump executable. + pub coverage_dump_path: Option<PathBuf>, + /// The Python executable to use for LLDB and htmldocck. pub python: String, @@ -639,6 +643,7 @@ pub const UI_EXTENSIONS: &[&str] = &[ UI_STDERR_32, UI_STDERR_16, UI_COVERAGE, + UI_COVERAGE_MAP, ]; pub const UI_STDERR: &str = "stderr"; pub const UI_STDOUT: &str = "stdout"; @@ -649,6 +654,7 @@ pub const UI_STDERR_64: &str = "64bit.stderr"; pub const UI_STDERR_32: &str = "32bit.stderr"; pub const UI_STDERR_16: &str = "16bit.stderr"; pub const UI_COVERAGE: &str = "coverage"; +pub const UI_COVERAGE_MAP: &str = "cov-map"; /// Absolute path to the directory where all output for all tests in the given /// `relative_dir` group should reside. Example: diff --git a/src/tools/compiletest/src/lib.rs b/src/tools/compiletest/src/lib.rs index 1a765477fe5..619ff9b3221 100644 --- a/src/tools/compiletest/src/lib.rs +++ b/src/tools/compiletest/src/lib.rs @@ -48,6 +48,7 @@ pub fn parse_config(args: Vec<String>) -> Config { .reqopt("", "rustc-path", "path to rustc to use for compiling", "PATH") .optopt("", "rustdoc-path", "path to rustdoc to use for compiling", "PATH") .optopt("", "rust-demangler-path", "path to rust-demangler to use in tests", "PATH") + .optopt("", "coverage-dump-path", "path to coverage-dump to use in tests", "PATH") .reqopt("", "python", "path to python to use for doc tests", "PATH") .optopt("", "jsondocck-path", "path to jsondocck to use for doc tests", "PATH") .optopt("", "jsondoclint-path", "path to jsondoclint to use for doc tests", "PATH") @@ -218,6 +219,7 @@ pub fn parse_config(args: Vec<String>) -> Config { rustc_path: opt_path(matches, "rustc-path"), rustdoc_path: matches.opt_str("rustdoc-path").map(PathBuf::from), rust_demangler_path: matches.opt_str("rust-demangler-path").map(PathBuf::from), + coverage_dump_path: matches.opt_str("coverage-dump-path").map(PathBuf::from), python: matches.opt_str("python").unwrap(), jsondocck_path: matches.opt_str("jsondocck-path"), jsondoclint_path: matches.opt_str("jsondoclint-path"), diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index dd4c59fdff5..670441aacbd 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -6,8 +6,8 @@ use crate::common::{Assembly, Incremental, JsDocTest, MirOpt, RunMake, RustdocJs use crate::common::{Codegen, CodegenUnits, DebugInfo, Debugger, Rustdoc}; use crate::common::{CompareMode, FailMode, PassMode}; use crate::common::{Config, TestPaths}; -use crate::common::{Pretty, RunCoverage, RunPassValgrind}; -use crate::common::{UI_COVERAGE, UI_RUN_STDERR, UI_RUN_STDOUT}; +use crate::common::{CoverageMap, Pretty, RunCoverage, RunPassValgrind}; +use crate::common::{UI_COVERAGE, UI_COVERAGE_MAP, UI_RUN_STDERR, UI_RUN_STDOUT}; use crate::compute_diff::{write_diff, write_filtered_diff}; use crate::errors::{self, Error, ErrorKind}; use crate::header::TestProps; @@ -254,6 +254,7 @@ impl<'test> TestCx<'test> { MirOpt => self.run_mir_opt_test(), Assembly => self.run_assembly_test(), JsDocTest => self.run_js_doc_test(), + CoverageMap => self.run_coverage_map_test(), RunCoverage => self.run_coverage_test(), } } @@ -467,6 +468,46 @@ impl<'test> TestCx<'test> { } } + fn run_coverage_map_test(&self) { + let Some(coverage_dump_path) = &self.config.coverage_dump_path else { + self.fatal("missing --coverage-dump"); + }; + + let proc_res = self.compile_test_and_save_ir(); + if !proc_res.status.success() { + self.fatal_proc_rec("compilation failed!", &proc_res); + } + drop(proc_res); + + let llvm_ir_path = self.output_base_name().with_extension("ll"); + + let mut dump_command = Command::new(coverage_dump_path); + dump_command.arg(llvm_ir_path); + let proc_res = self.run_command_to_procres(&mut dump_command); + if !proc_res.status.success() { + self.fatal_proc_rec("coverage-dump failed!", &proc_res); + } + + let kind = UI_COVERAGE_MAP; + + let expected_coverage_dump = self.load_expected_output(kind); + let actual_coverage_dump = self.normalize_output(&proc_res.stdout, &[]); + + let coverage_dump_errors = self.compare_output( + kind, + &actual_coverage_dump, + &expected_coverage_dump, + self.props.compare_output_lines_by_subset, + ); + + if coverage_dump_errors > 0 { + self.fatal_proc_rec( + &format!("{coverage_dump_errors} errors occurred comparing coverage output."), + &proc_res, + ); + } + } + fn run_coverage_test(&self) { let should_run = self.run_if_enabled(); let proc_res = self.compile_test(should_run, Emit::None); @@ -650,6 +691,10 @@ impl<'test> TestCx<'test> { let mut cmd = Command::new(tool_path); configure_cmd_fn(&mut cmd); + self.run_command_to_procres(&mut cmd) + } + + fn run_command_to_procres(&self, cmd: &mut Command) -> ProcRes { let output = cmd.output().unwrap_or_else(|_| panic!("failed to exec `{cmd:?}`")); let proc_res = ProcRes { @@ -2321,9 +2366,11 @@ impl<'test> TestCx<'test> { } } DebugInfo => { /* debuginfo tests must be unoptimized */ } - RunCoverage => { - // Coverage reports are affected by optimization level, and - // the current snapshots assume no optimization by default. + CoverageMap | RunCoverage => { + // Coverage mappings and coverage reports are affected by + // optimization level, so they ignore the optimize-tests + // setting and set an optimization level in their mode's + // compile flags (below) or in per-test `compile-flags`. } _ => { rustc.arg("-O"); @@ -2392,8 +2439,22 @@ impl<'test> TestCx<'test> { rustc.arg(dir_opt); } + CoverageMap => { + rustc.arg("-Cinstrument-coverage"); + // These tests only compile to MIR, so they don't need the + // profiler runtime to be present. + rustc.arg("-Zno-profiler-runtime"); + // Coverage mappings are sensitive to MIR optimizations, and + // the current snapshots assume `opt-level=2` unless overridden + // by `compile-flags`. + rustc.arg("-Copt-level=2"); + } RunCoverage => { rustc.arg("-Cinstrument-coverage"); + // Coverage reports are sometimes sensitive to optimizations, + // and the current snapshots assume no optimization unless + // overridden by `compile-flags`. + rustc.arg("-Copt-level=0"); } RunPassValgrind | Pretty | DebugInfo | Codegen | Rustdoc | RustdocJson | RunMake | CodegenUnits | JsDocTest | Assembly => { diff --git a/src/tools/coverage-dump/Cargo.toml b/src/tools/coverage-dump/Cargo.toml new file mode 100644 index 00000000000..7f14286b5d0 --- /dev/null +++ b/src/tools/coverage-dump/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "coverage-dump" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.71" +leb128 = "0.2.5" +md5 = { package = "md-5" , version = "0.10.5" } +miniz_oxide = "0.7.1" +regex = "1.8.4" +rustc-demangle = "0.1.23" diff --git a/src/tools/coverage-dump/README.md b/src/tools/coverage-dump/README.md new file mode 100644 index 00000000000..e2625d5adf2 --- /dev/null +++ b/src/tools/coverage-dump/README.md @@ -0,0 +1,8 @@ +This tool extracts coverage mapping information from an LLVM IR assembly file +(`.ll`), and prints it in a more human-readable form that can be used for +snapshot tests. + +The output format is mostly arbitrary, so it's OK to change the output as long +as any affected tests are also re-blessed. However, the output should be +consistent across different executions on different platforms, so avoid +printing any information that is platform-specific or non-deterministic. diff --git a/src/tools/coverage-dump/src/covfun.rs b/src/tools/coverage-dump/src/covfun.rs new file mode 100644 index 00000000000..3a5866dea3e --- /dev/null +++ b/src/tools/coverage-dump/src/covfun.rs @@ -0,0 +1,296 @@ +use crate::parser::{unescape_llvm_string_contents, Parser}; +use anyhow::{anyhow, Context}; +use regex::Regex; +use std::collections::HashMap; +use std::fmt::{self, Debug, Write as _}; +use std::sync::OnceLock; + +pub(crate) fn dump_covfun_mappings( + llvm_ir: &str, + function_names: &HashMap<u64, String>, +) -> anyhow::Result<()> { + // Extract function coverage entries from the LLVM IR assembly, and associate + // each entry with its (demangled) name. + let mut covfun_entries = llvm_ir + .lines() + .filter_map(covfun_line_data) + .map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data)) + .collect::<Vec<_>>(); + covfun_entries.sort_by(|a, b| { + // Sort entries primarily by name, to help make the order consistent + // across platforms and relatively insensitive to changes. + // (Sadly we can't use `sort_by_key` because we would need to return references.) + Ord::cmp(&a.0, &b.0) + .then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used)) + .then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice())) + }); + + for (name, line_data) in &covfun_entries { + let name = name.unwrap_or("(unknown)"); + let unused = if line_data.is_used { "" } else { " (unused)" }; + println!("Function name: {name}{unused}"); + + let payload: &[u8] = &line_data.payload; + println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len()); + + let mut parser = Parser::new(payload); + + let num_files = parser.read_uleb128_u32()?; + println!("Number of files: {num_files}"); + + for i in 0..num_files { + let global_file_id = parser.read_uleb128_u32()?; + println!("- file {i} => global file {global_file_id}"); + } + + let num_expressions = parser.read_uleb128_u32()?; + println!("Number of expressions: {num_expressions}"); + + let mut expression_resolver = ExpressionResolver::new(); + for i in 0..num_expressions { + let lhs = parser.read_simple_term()?; + let rhs = parser.read_simple_term()?; + println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}"); + expression_resolver.push_operands(lhs, rhs); + } + + for i in 0..num_files { + let num_mappings = parser.read_uleb128_u32()?; + println!("Number of file {i} mappings: {num_mappings}"); + + for _ in 0..num_mappings { + let (kind, region) = parser.read_mapping_kind_and_region()?; + println!("- {kind:?} at {region:?}"); + + match kind { + // Also print expression mappings in resolved form. + MappingKind::Code(term @ CovTerm::Expression { .. }) + | MappingKind::Gap(term @ CovTerm::Expression { .. }) => { + println!(" = {}", expression_resolver.format_term(term)); + } + // If the mapping is a branch region, print both of its arms + // in resolved form (even if they aren't expressions). + MappingKind::Branch { r#true, r#false } => { + println!(" true = {}", expression_resolver.format_term(r#true)); + println!(" false = {}", expression_resolver.format_term(r#false)); + } + _ => (), + } + } + } + + parser.ensure_empty()?; + println!(); + } + Ok(()) +} + +struct CovfunLineData { + name_hash: u64, + is_used: bool, + payload: Vec<u8>, +} + +/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun` +/// entry, and if so extracts relevant data in a `CovfunLineData`. +fn covfun_line_data(line: &str) -> Option<CovfunLineData> { + let re = { + // We cheat a little bit and match variable names `@__covrec_[HASH]u` + // rather than the section name, because the section name is harder to + // extract and differs across Linux/Windows/macOS. We also extract the + // symbol name hash from the variable name rather than the data, since + // it's easier and both should match. + static RE: OnceLock<Regex> = OnceLock::new(); + RE.get_or_init(|| { + Regex::new( + r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#, + ) + .unwrap() + }) + }; + + let captures = re.captures(line)?; + let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap(); + let is_used = captures.name("is_used").is_some(); + let payload = unescape_llvm_string_contents(&captures["payload"]); + + Some(CovfunLineData { name_hash, is_used, payload }) +} + +// Extra parser methods only needed when parsing `covfun` payloads. +impl<'a> Parser<'a> { + fn read_simple_term(&mut self) -> anyhow::Result<CovTerm> { + let raw_term = self.read_uleb128_u32()?; + CovTerm::decode(raw_term).context("decoding term") + } + + fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> { + let mut kind = self.read_raw_mapping_kind()?; + let mut region = self.read_raw_mapping_region()?; + + const HIGH_BIT: u32 = 1u32 << 31; + if region.end_column & HIGH_BIT != 0 { + region.end_column &= !HIGH_BIT; + kind = match kind { + MappingKind::Code(term) => MappingKind::Gap(term), + // LLVM's coverage mapping reader will actually handle this + // case without complaint, but the result is almost certainly + // a meaningless implementation artifact. + _ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")), + } + } + + Ok((kind, region)) + } + + fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> { + let raw_mapping_kind = self.read_uleb128_u32()?; + if let Some(term) = CovTerm::decode(raw_mapping_kind) { + return Ok(MappingKind::Code(term)); + } + + assert_eq!(raw_mapping_kind & 0b11, 0); + assert_ne!(raw_mapping_kind, 0); + + let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0); + if is_expansion { + Ok(MappingKind::Expansion(high)) + } else { + match high { + 0 => unreachable!("zero kind should have already been handled as a code mapping"), + 2 => Ok(MappingKind::Skip), + 4 => { + let r#true = self.read_simple_term()?; + let r#false = self.read_simple_term()?; + Ok(MappingKind::Branch { r#true, r#false }) + } + _ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")), + } + } + } + + fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> { + let start_line_offset = self.read_uleb128_u32()?; + let start_column = self.read_uleb128_u32()?; + let end_line_offset = self.read_uleb128_u32()?; + let end_column = self.read_uleb128_u32()?; + Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column }) + } +} + +/// Enum that can hold a constant zero value, the ID of an physical coverage +/// counter, or the ID (and operation) of a coverage-counter expression. +/// +/// Terms are used as the operands of coverage-counter expressions, as the arms +/// of branch mappings, and as the value of code/gap mappings. +#[derive(Clone, Copy, Debug)] +pub(crate) enum CovTerm { + Zero, + Counter(u32), + Expression(u32, Op), +} + +/// Operator (addition or subtraction) used by an expression. +#[derive(Clone, Copy, Debug)] +pub(crate) enum Op { + Sub, + Add, +} + +impl CovTerm { + pub(crate) fn decode(input: u32) -> Option<Self> { + let (high, tag) = (input >> 2, input & 0b11); + match tag { + 0b00 if high == 0 => Some(Self::Zero), + 0b01 => Some(Self::Counter(high)), + 0b10 => Some(Self::Expression(high, Op::Sub)), + 0b11 => Some(Self::Expression(high, Op::Add)), + // When reading expression operands or branch arms, the LLVM coverage + // mapping reader will always interpret a `0b00` tag as a zero + // term, even when the high bits are non-zero. + // We treat that case as failure instead, so that this code can be + // shared by the full mapping-kind reader as well. + _ => None, + } + } +} + +#[derive(Debug)] +enum MappingKind { + Code(CovTerm), + Gap(CovTerm), + Expansion(u32), + Skip, + // Using raw identifiers here makes the dump output a little bit nicer + // (via the derived Debug), at the expense of making this tool's source + // code a little bit uglier. + Branch { r#true: CovTerm, r#false: CovTerm }, +} + +struct MappingRegion { + /// Offset of this region's start line, relative to the *start line* of + /// the *previous mapping* (or 0). Line numbers are 1-based. + start_line_offset: u32, + /// This region's start column, absolute and 1-based. + start_column: u32, + /// Offset of this region's end line, relative to the *this mapping's* + /// start line. Line numbers are 1-based. + end_line_offset: u32, + /// This region's end column, absolute, 1-based, and exclusive. + /// + /// If the highest bit is set, that bit is cleared and the associated + /// mapping becomes a gap region mapping. + end_column: u32, +} + +impl Debug for MappingRegion { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "(prev + {}, {}) to (start + {}, {})", + self.start_line_offset, self.start_column, self.end_line_offset, self.end_column + ) + } +} + +/// Helper type that prints expressions in a "resolved" form, so that +/// developers reading the dump don't need to resolve expressions by hand. +struct ExpressionResolver { + operands: Vec<(CovTerm, CovTerm)>, +} + +impl ExpressionResolver { + fn new() -> Self { + Self { operands: Vec::new() } + } + + fn push_operands(&mut self, lhs: CovTerm, rhs: CovTerm) { + self.operands.push((lhs, rhs)); + } + + fn format_term(&self, term: CovTerm) -> String { + let mut output = String::new(); + self.write_term(&mut output, term); + output + } + + fn write_term(&self, output: &mut String, term: CovTerm) { + match term { + CovTerm::Zero => output.push_str("Zero"), + CovTerm::Counter(id) => write!(output, "c{id}").unwrap(), + CovTerm::Expression(id, op) => { + let (lhs, rhs) = self.operands[id as usize]; + let op = match op { + Op::Sub => "-", + Op::Add => "+", + }; + + output.push('('); + self.write_term(output, lhs); + write!(output, " {op} ").unwrap(); + self.write_term(output, rhs); + output.push(')'); + } + } + } +} diff --git a/src/tools/coverage-dump/src/main.rs b/src/tools/coverage-dump/src/main.rs new file mode 100644 index 00000000000..93fed1799e0 --- /dev/null +++ b/src/tools/coverage-dump/src/main.rs @@ -0,0 +1,17 @@ +mod covfun; +mod parser; +mod prf_names; + +fn main() -> anyhow::Result<()> { + use anyhow::Context as _; + + let args = std::env::args().collect::<Vec<_>>(); + + let llvm_ir_path = args.get(1).context("LLVM IR file not specified")?; + let llvm_ir = std::fs::read_to_string(llvm_ir_path).context("couldn't read LLVM IR file")?; + + let function_names = crate::prf_names::make_function_names_table(&llvm_ir)?; + crate::covfun::dump_covfun_mappings(&llvm_ir, &function_names)?; + + Ok(()) +} diff --git a/src/tools/coverage-dump/src/parser.rs b/src/tools/coverage-dump/src/parser.rs new file mode 100644 index 00000000000..eefac1a4f94 --- /dev/null +++ b/src/tools/coverage-dump/src/parser.rs @@ -0,0 +1,80 @@ +#[cfg(test)] +mod tests; + +use anyhow::ensure; +use regex::bytes; +use std::sync::OnceLock; + +/// Given the raw contents of a string literal in LLVM IR assembly, decodes any +/// backslash escapes and returns a vector containing the resulting byte string. +pub(crate) fn unescape_llvm_string_contents(contents: &str) -> Vec<u8> { + let escape_re = { + static RE: OnceLock<bytes::Regex> = OnceLock::new(); + // LLVM IR supports two string escapes: `\\` and `\xx`. + RE.get_or_init(|| bytes::Regex::new(r"\\\\|\\([0-9A-Za-z]{2})").unwrap()) + }; + + fn u8_from_hex_digits(digits: &[u8]) -> u8 { + // We know that the input contains exactly 2 hex digits, so these calls + // should never fail. + assert_eq!(digits.len(), 2); + let digits = std::str::from_utf8(digits).unwrap(); + u8::from_str_radix(digits, 16).unwrap() + } + + escape_re + .replace_all(contents.as_bytes(), |captures: &bytes::Captures<'_>| { + let byte = match captures.get(1) { + None => b'\\', + Some(hex_digits) => u8_from_hex_digits(hex_digits.as_bytes()), + }; + [byte] + }) + .into_owned() +} + +pub(crate) struct Parser<'a> { + rest: &'a [u8], +} + +impl<'a> Parser<'a> { + pub(crate) fn new(input: &'a [u8]) -> Self { + Self { rest: input } + } + + pub(crate) fn ensure_empty(self) -> anyhow::Result<()> { + ensure!(self.rest.is_empty(), "unparsed bytes: 0x{:02x?}", self.rest); + Ok(()) + } + + pub(crate) fn read_n_bytes(&mut self, n: usize) -> anyhow::Result<&'a [u8]> { + ensure!(n <= self.rest.len()); + + let (bytes, rest) = self.rest.split_at(n); + self.rest = rest; + Ok(bytes) + } + + pub(crate) fn read_uleb128_u32(&mut self) -> anyhow::Result<u32> { + self.read_uleb128_u64_and_convert() + } + + pub(crate) fn read_uleb128_usize(&mut self) -> anyhow::Result<usize> { + self.read_uleb128_u64_and_convert() + } + + fn read_uleb128_u64_and_convert<T>(&mut self) -> anyhow::Result<T> + where + T: TryFrom<u64> + 'static, + T::Error: std::error::Error + Send + Sync, + { + let mut temp_rest = self.rest; + let raw_value: u64 = leb128::read::unsigned(&mut temp_rest)?; + let converted_value = T::try_from(raw_value)?; + + // Only update `self.rest` if the above steps succeeded, so that the + // parser position can be used for error reporting if desired. + self.rest = temp_rest; + Ok(converted_value) + } +} diff --git a/src/tools/coverage-dump/src/parser/tests.rs b/src/tools/coverage-dump/src/parser/tests.rs new file mode 100644 index 00000000000..a673606b9c4 --- /dev/null +++ b/src/tools/coverage-dump/src/parser/tests.rs @@ -0,0 +1,38 @@ +use super::unescape_llvm_string_contents; + +// WARNING: These tests don't necessarily run in CI, and were mainly used to +// help track down problems when originally developing this tool. +// (The tool is still tested indirectly by snapshot tests that rely on it.) + +// Tests for `unescape_llvm_string_contents`: + +#[test] +fn unescape_empty() { + assert_eq!(unescape_llvm_string_contents(""), &[]); +} + +#[test] +fn unescape_noop() { + let input = "The quick brown fox jumps over the lazy dog."; + assert_eq!(unescape_llvm_string_contents(input), input.as_bytes()); +} + +#[test] +fn unescape_backslash() { + let input = r"\\Hello\\world\\"; + assert_eq!(unescape_llvm_string_contents(input), r"\Hello\world\".as_bytes()); +} + +#[test] +fn unescape_hex() { + let input = r"\01\02\03\04\0a\0b\0C\0D\fd\fE\FF"; + let expected: &[u8] = &[0x01, 0x02, 0x03, 0x04, 0x0a, 0x0b, 0x0c, 0x0d, 0xfd, 0xfe, 0xff]; + assert_eq!(unescape_llvm_string_contents(input), expected); +} + +#[test] +fn unescape_mixed() { + let input = r"\\01.\5c\5c"; + let expected: &[u8] = br"\01.\\"; + assert_eq!(unescape_llvm_string_contents(input), expected); +} diff --git a/src/tools/coverage-dump/src/prf_names.rs b/src/tools/coverage-dump/src/prf_names.rs new file mode 100644 index 00000000000..d3f7b819e48 --- /dev/null +++ b/src/tools/coverage-dump/src/prf_names.rs @@ -0,0 +1,87 @@ +use crate::parser::{unescape_llvm_string_contents, Parser}; +use anyhow::{anyhow, ensure}; +use regex::Regex; +use std::collections::HashMap; +use std::sync::OnceLock; + +/// Scans through the contents of an LLVM IR assembly file to find `__llvm_prf_names` +/// entries, decodes them, and creates a table that maps name hash values to +/// (demangled) function names. +pub(crate) fn make_function_names_table(llvm_ir: &str) -> anyhow::Result<HashMap<u64, String>> { + fn prf_names_payload(line: &str) -> Option<&str> { + let re = { + // We cheat a little bit and match the variable name `@__llvm_prf_nm` + // rather than the section name, because the section name is harder + // to extract and differs across Linux/Windows/macOS. + static RE: OnceLock<Regex> = OnceLock::new(); + RE.get_or_init(|| { + Regex::new(r#"^@__llvm_prf_nm =.*\[[0-9]+ x i8\] c"([^"]*)".*$"#).unwrap() + }) + }; + + let payload = re.captures(line)?.get(1).unwrap().as_str(); + Some(payload) + } + + /// LLVM's profiler/coverage metadata often uses an MD5 hash truncated to + /// 64 bits as a way to associate data stored in different tables/sections. + fn truncated_md5(bytes: &[u8]) -> u64 { + use md5::{Digest, Md5}; + let mut hasher = Md5::new(); + hasher.update(bytes); + let hash: [u8; 8] = hasher.finalize().as_slice()[..8].try_into().unwrap(); + // The truncated hash is explicitly little-endian, regardless of host + // or target platform. (See `MD5Result::low` in LLVM's `MD5.h`.) + u64::from_le_bytes(hash) + } + + fn demangle_if_able(symbol_name_bytes: &[u8]) -> anyhow::Result<String> { + // In practice, raw symbol names should always be ASCII. + let symbol_name_str = std::str::from_utf8(symbol_name_bytes)?; + match rustc_demangle::try_demangle(symbol_name_str) { + Ok(d) => Ok(format!("{d:#}")), + // If demangling failed, don't treat it as an error. This lets us + // run the dump tool against non-Rust coverage maps produced by + // `clang`, for testing purposes. + Err(_) => Ok(format!("(couldn't demangle) {symbol_name_str}")), + } + } + + let mut map = HashMap::new(); + + for payload in llvm_ir.lines().filter_map(prf_names_payload).map(unescape_llvm_string_contents) + { + let mut parser = Parser::new(&payload); + let uncompressed_len = parser.read_uleb128_usize()?; + let compressed_len = parser.read_uleb128_usize()?; + + let uncompressed_bytes_vec; + let uncompressed_bytes: &[u8] = if compressed_len == 0 { + // The symbol name bytes are uncompressed, so read them directly. + parser.read_n_bytes(uncompressed_len)? + } else { + // The symbol name bytes are compressed, so read and decompress them. + let compressed_bytes = parser.read_n_bytes(compressed_len)?; + + uncompressed_bytes_vec = miniz_oxide::inflate::decompress_to_vec_zlib_with_limit( + compressed_bytes, + uncompressed_len, + ) + .map_err(|e| anyhow!("{e:?}"))?; + ensure!(uncompressed_bytes_vec.len() == uncompressed_len); + + &uncompressed_bytes_vec + }; + + // Symbol names in the payload are separated by `0x01` bytes. + for raw_name in uncompressed_bytes.split(|&b| b == 0x01) { + let hash = truncated_md5(raw_name); + let demangled = demangle_if_able(raw_name)?; + map.insert(hash, demangled); + } + + parser.ensure_empty()?; + } + + Ok(map) +} |
