coverage-dump: Include filenames hash in covfun line data

author: Zalathar <Zalathar@users.noreply.github.com> 2025-04-22 21:49:57 +1000
committer: Zalathar <Zalathar@users.noreply.github.com> 2025-05-06 11:58:58 +1000
commit: f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410 (patch)
tree: 78e3a5b2bc6b411d4dbdd5684277c6708915a895
parent: bc3f0e326a5ef77bb4e3531db5190109d8e2420b (diff)
download: rust-f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410.tar.gz
rust-f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410.zip
4 files changed, 102 insertions, 26 deletions
diff --git a/Cargo.lock b/Cargo.lock
index bbd3f33d7bd..b5fc86f50fe 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -777,6 +777,7 @@ name = "coverage-dump"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "itertools",
  "leb128",
  "md-5",
  "miniz_oxide 0.7.4",
diff --git a/src/tools/coverage-dump/Cargo.toml b/src/tools/coverage-dump/Cargo.toml
index 7f14286b5d0..6f92ac50d96 100644
--- a/src/tools/coverage-dump/Cargo.toml
+++ b/src/tools/coverage-dump/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 
 [dependencies]
 anyhow = "1.0.71"
+itertools = "0.12"
 leb128 = "0.2.5"
 md5 = { package = "md-5" , version = "0.10.5" }
 miniz_oxide = "0.7.1"
diff --git a/src/tools/coverage-dump/src/covfun.rs b/src/tools/coverage-dump/src/covfun.rs
index 458fd680429..4c0ce205822 100644
--- a/src/tools/coverage-dump/src/covfun.rs
+++ b/src/tools/coverage-dump/src/covfun.rs
@@ -1,13 +1,17 @@
 use std::collections::HashMap;
 use std::fmt::{self, Debug, Write as _};
-use std::sync::OnceLock;
+use std::sync::LazyLock;
 
-use anyhow::{Context, anyhow};
+use anyhow::{Context, anyhow, ensure};
+use itertools::Itertools;
 use regex::Regex;
 
 use crate::llvm_utils::unescape_llvm_string_contents;
 use crate::parser::Parser;
 
+#[cfg(test)]
+mod tests;
+
 pub(crate) fn dump_covfun_mappings(
     llvm_ir: &str,
     function_names: &HashMap<u64, String>,
@@ -16,9 +20,12 @@ pub(crate) fn dump_covfun_mappings(
     // each entry with its (demangled) name.
     let mut covfun_entries = llvm_ir
         .lines()
-        .filter_map(covfun_line_data)
-        .map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
-        .collect::<Vec<_>>();
+        .filter(|line| is_covfun_line(line))
+        .map(parse_covfun_line)
+        .map_ok(|line_data| {
+            (function_names.get(&line_data.name_hash).map(String::as_str), line_data)
+        })
+        .collect::<Result<Vec<_>, _>>()?;
     covfun_entries.sort_by(|a, b| {
         // Sort entries primarily by name, to help make the order consistent
         // across platforms and relatively insensitive to changes.
@@ -108,36 +115,50 @@ pub(crate) fn dump_covfun_mappings(
     Ok(())
 }
 
+#[derive(Debug, PartialEq, Eq)]
 struct CovfunLineData {
-    name_hash: u64,
     is_used: bool,
+    name_hash: u64,
+    filenames_hash: u64,
     payload: Vec<u8>,
 }
 
-/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
-/// entry, and if so extracts relevant data in a `CovfunLineData`.
-fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
-    let re = {
-        // We cheat a little bit and match variable names `@__covrec_[HASH]u`
-        // rather than the section name, because the section name is harder to
-        // extract and differs across Linux/Windows/macOS. We also extract the
-        // symbol name hash from the variable name rather than the data, since
-        // it's easier and both should match.
-        static RE: OnceLock<Regex> = OnceLock::new();
-        RE.get_or_init(|| {
-            Regex::new(
-                r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
-            )
-            .unwrap()
-        })
-    };
+fn is_covfun_line(line: &str) -> bool {
+    line.starts_with("@__covrec_")
+}
 
-    let captures = re.captures(line)?;
-    let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
+/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
+/// entry, parses it to extract relevant data in a `CovfunLineData`.
+fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> {
+    ensure!(is_covfun_line(line));
+
+    // We cheat a little bit and match variable names `@__covrec_[HASH]u`
+    // rather than the section name, because the section name is harder to
+    // extract and differs across Linux/Windows/macOS.
+    const RE_STRING: &str = r#"(?x)^
+        @__covrec_[0-9A-Z]+(?<is_used>u)?
+        \ = \ # (trailing space)
+        .*
+        <\{
+            \ i64 \ (?<name_hash> -? [0-9]+),
+            \ i32 \ -? [0-9]+, # (length of payload; currently unused)
+            \ i64 \ -? [0-9]+, # (source hash; currently unused)
+            \ i64 \ (?<filenames_hash> -? [0-9]+),
+            \ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
+            \ # (trailing space)
+        }>
+        .*$
+    "#;
+    static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
+
+    let captures =
+        RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?;
     let is_used = captures.name("is_used").is_some();
+    let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64;
+    let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64;
     let payload = unescape_llvm_string_contents(&captures["payload"]);
 
-    Some(CovfunLineData { name_hash, is_used, payload })
+    Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload })
 }
 
 // Extra parser methods only needed when parsing `covfun` payloads.
diff --git a/src/tools/coverage-dump/src/covfun/tests.rs b/src/tools/coverage-dump/src/covfun/tests.rs
new file mode 100644
index 00000000000..1ce833784bd
--- /dev/null
+++ b/src/tools/coverage-dump/src/covfun/tests.rs
@@ -0,0 +1,53 @@
+use super::{CovfunLineData, parse_covfun_line};
+
+/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends
+/// to emit them in signed form, so this helper function converts `i64` to `u64`.
+fn as_u64(x: i64) -> u64 {
+    x as u64
+}
+
+#[test]
+fn parse_covfun_line_data() {
+    struct Case {
+        line: &'static str,
+        expected: CovfunLineData,
+    }
+    let cases = &[
+        // Copied from `trivial.ll`:
+        Case {
+            line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
+            expected: CovfunLineData {
+                is_used: true,
+                name_hash: as_u64(5307978893922758273),
+                filenames_hash: as_u64(6343436898695299756),
+                payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(),
+            },
+        },
+        // Copied from `on-off-sandwich.ll`:
+        Case {
+            line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
+            expected: CovfunLineData {
+                is_used: true,
+                name_hash: as_u64(-3400688559180533350),
+                filenames_hash: as_u64(892196767019953100),
+                payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(),
+            },
+        },
+        // Copied from `no-core.ll`:
+        Case {
+            line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
+            expected: CovfunLineData {
+                is_used: true,
+                name_hash: as_u64(1116917981370409222),
+                filenames_hash: as_u64(-3625186110715410276),
+                payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(),
+            },
+        },
+    ];
+
+    for &Case { line, ref expected } in cases {
+        println!("- {line}");
+        let line_data = parse_covfun_line(line).map_err(|e| e.to_string());
+        assert_eq!(line_data.as_ref(), Ok(expected));
+    }
+}
author	Zalathar <Zalathar@users.noreply.github.com>	2025-04-22 21:49:57 +1000
committer	Zalathar <Zalathar@users.noreply.github.com>	2025-05-06 11:58:58 +1000
commit	f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410 (patch)
tree	78e3a5b2bc6b411d4dbdd5684277c6708915a895
parent	bc3f0e326a5ef77bb4e3531db5190109d8e2420b (diff)
download	rust-f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410.tar.gz rust-f1b8cd433f31af7e25fb8b7ec7436d6ebf3f5410.zip