diff options
| author | Zalathar <Zalathar@users.noreply.github.com> | 2025-04-22 21:49:57 +1000 |
|---|---|---|
| committer | Zalathar <Zalathar@users.noreply.github.com> | 2025-05-06 11:58:58 +1000 |
| commit | f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410 (patch) | |
| tree | 78e3a5b2bc6b411d4dbdd5684277c6708915a895 | |
| parent | bc3f0e326a5ef77bb4e3531db5190109d8e2420b (diff) | |
| download | rust-f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410.tar.gz rust-f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410.zip | |
coverage-dump: Include filenames hash in covfun line data
| -rw-r--r-- | Cargo.lock | 1 | ||||
| -rw-r--r-- | src/tools/coverage-dump/Cargo.toml | 1 | ||||
| -rw-r--r-- | src/tools/coverage-dump/src/covfun.rs | 73 | ||||
| -rw-r--r-- | src/tools/coverage-dump/src/covfun/tests.rs | 53 |
4 files changed, 102 insertions, 26 deletions
diff --git a/Cargo.lock b/Cargo.lock index bbd3f33d7bd..b5fc86f50fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -777,6 +777,7 @@ name = "coverage-dump" version = "0.1.0" dependencies = [ "anyhow", + "itertools", "leb128", "md-5", "miniz_oxide 0.7.4", diff --git a/src/tools/coverage-dump/Cargo.toml b/src/tools/coverage-dump/Cargo.toml index 7f14286b5d0..6f92ac50d96 100644 --- a/src/tools/coverage-dump/Cargo.toml +++ b/src/tools/coverage-dump/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] anyhow = "1.0.71" +itertools = "0.12" leb128 = "0.2.5" md5 = { package = "md-5" , version = "0.10.5" } miniz_oxide = "0.7.1" diff --git a/src/tools/coverage-dump/src/covfun.rs b/src/tools/coverage-dump/src/covfun.rs index 458fd680429..4c0ce205822 100644 --- a/src/tools/coverage-dump/src/covfun.rs +++ b/src/tools/coverage-dump/src/covfun.rs @@ -1,13 +1,17 @@ use std::collections::HashMap; use std::fmt::{self, Debug, Write as _}; -use std::sync::OnceLock; +use std::sync::LazyLock; -use anyhow::{Context, anyhow}; +use anyhow::{Context, anyhow, ensure}; +use itertools::Itertools; use regex::Regex; use crate::llvm_utils::unescape_llvm_string_contents; use crate::parser::Parser; +#[cfg(test)] +mod tests; + pub(crate) fn dump_covfun_mappings( llvm_ir: &str, function_names: &HashMap<u64, String>, @@ -16,9 +20,12 @@ pub(crate) fn dump_covfun_mappings( // each entry with its (demangled) name. let mut covfun_entries = llvm_ir .lines() - .filter_map(covfun_line_data) - .map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data)) - .collect::<Vec<_>>(); + .filter(|line| is_covfun_line(line)) + .map(parse_covfun_line) + .map_ok(|line_data| { + (function_names.get(&line_data.name_hash).map(String::as_str), line_data) + }) + .collect::<Result<Vec<_>, _>>()?; covfun_entries.sort_by(|a, b| { // Sort entries primarily by name, to help make the order consistent // across platforms and relatively insensitive to changes. @@ -108,36 +115,50 @@ pub(crate) fn dump_covfun_mappings( Ok(()) } +#[derive(Debug, PartialEq, Eq)] struct CovfunLineData { - name_hash: u64, is_used: bool, + name_hash: u64, + filenames_hash: u64, payload: Vec<u8>, } -/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun` -/// entry, and if so extracts relevant data in a `CovfunLineData`. -fn covfun_line_data(line: &str) -> Option<CovfunLineData> { - let re = { - // We cheat a little bit and match variable names `@__covrec_[HASH]u` - // rather than the section name, because the section name is harder to - // extract and differs across Linux/Windows/macOS. We also extract the - // symbol name hash from the variable name rather than the data, since - // it's easier and both should match. - static RE: OnceLock<Regex> = OnceLock::new(); - RE.get_or_init(|| { - Regex::new( - r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#, - ) - .unwrap() - }) - }; +fn is_covfun_line(line: &str) -> bool { + line.starts_with("@__covrec_") +} - let captures = re.captures(line)?; - let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap(); +/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun` +/// entry, parses it to extract relevant data in a `CovfunLineData`. +fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> { + ensure!(is_covfun_line(line)); + + // We cheat a little bit and match variable names `@__covrec_[HASH]u` + // rather than the section name, because the section name is harder to + // extract and differs across Linux/Windows/macOS. + const RE_STRING: &str = r#"(?x)^ + @__covrec_[0-9A-Z]+(?<is_used>u)? + \ = \ # (trailing space) + .* + <\{ + \ i64 \ (?<name_hash> -? [0-9]+), + \ i32 \ -? [0-9]+, # (length of payload; currently unused) + \ i64 \ -? [0-9]+, # (source hash; currently unused) + \ i64 \ (?<filenames_hash> -? [0-9]+), + \ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)" + \ # (trailing space) + }> + .*$ + "#; + static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap()); + + let captures = + RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?; let is_used = captures.name("is_used").is_some(); + let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64; + let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64; let payload = unescape_llvm_string_contents(&captures["payload"]); - Some(CovfunLineData { name_hash, is_used, payload }) + Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload }) } // Extra parser methods only needed when parsing `covfun` payloads. diff --git a/src/tools/coverage-dump/src/covfun/tests.rs b/src/tools/coverage-dump/src/covfun/tests.rs new file mode 100644 index 00000000000..1ce833784bd --- /dev/null +++ b/src/tools/coverage-dump/src/covfun/tests.rs @@ -0,0 +1,53 @@ +use super::{CovfunLineData, parse_covfun_line}; + +/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends +/// to emit them in signed form, so this helper function converts `i64` to `u64`. +fn as_u64(x: i64) -> u64 { + x as u64 +} + +#[test] +fn parse_covfun_line_data() { + struct Case { + line: &'static str, + expected: CovfunLineData, + } + let cases = &[ + // Copied from `trivial.ll`: + Case { + line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#, + expected: CovfunLineData { + is_used: true, + name_hash: as_u64(5307978893922758273), + filenames_hash: as_u64(6343436898695299756), + payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(), + }, + }, + // Copied from `on-off-sandwich.ll`: + Case { + line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#, + expected: CovfunLineData { + is_used: true, + name_hash: as_u64(-3400688559180533350), + filenames_hash: as_u64(892196767019953100), + payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(), + }, + }, + // Copied from `no-core.ll`: + Case { + line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#, + expected: CovfunLineData { + is_used: true, + name_hash: as_u64(1116917981370409222), + filenames_hash: as_u64(-3625186110715410276), + payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(), + }, + }, + ]; + + for &Case { line, ref expected } in cases { + println!("- {line}"); + let line_data = parse_covfun_line(line).map_err(|e| e.to_string()); + assert_eq!(line_data.as_ref(), Ok(expected)); + } +} |
