coverage-dump: Extract some common code to an `llvm_utils` submodule

author: Zalathar <Zalathar@users.noreply.github.com> 2025-04-23 17:36:01 +1000
committer: Zalathar <Zalathar@users.noreply.github.com> 2025-05-06 11:58:50 +1000
commit: 89319f2e128379a96943de2be347ec09a80abca4 (patch)
tree: a0b14b15a489c8dab444119211b9a9d753c9dc3a /src
parent: a3d5562fcf7f67491d55588ac3279bfade9dadcd (diff)
download: rust-89319f2e128379a96943de2be347ec09a80abca4.tar.gz
rust-89319f2e128379a96943de2be347ec09a80abca4.zip
6 files changed, 51 insertions, 48 deletions
diff --git a/src/tools/coverage-dump/src/covfun.rs b/src/tools/coverage-dump/src/covfun.rs
index 82ebd33d0d1..458fd680429 100644
--- a/src/tools/coverage-dump/src/covfun.rs
+++ b/src/tools/coverage-dump/src/covfun.rs
@@ -5,7 +5,8 @@ use std::sync::OnceLock;
 use anyhow::{Context, anyhow};
 use regex::Regex;
 
-use crate::parser::{Parser, unescape_llvm_string_contents};
+use crate::llvm_utils::unescape_llvm_string_contents;
+use crate::parser::Parser;
 
 pub(crate) fn dump_covfun_mappings(
     llvm_ir: &str,
diff --git a/src/tools/coverage-dump/src/llvm_utils.rs b/src/tools/coverage-dump/src/llvm_utils.rs
new file mode 100644
index 00000000000..017fdbec0fc
--- /dev/null
+++ b/src/tools/coverage-dump/src/llvm_utils.rs
@@ -0,0 +1,46 @@
+use std::sync::OnceLock;
+
+use regex::bytes;
+
+#[cfg(test)]
+mod tests;
+
+/// Given the raw contents of a string literal in LLVM IR assembly, decodes any
+/// backslash escapes and returns a vector containing the resulting byte string.
+pub(crate) fn unescape_llvm_string_contents(contents: &str) -> Vec<u8> {
+    let escape_re = {
+        static RE: OnceLock<bytes::Regex> = OnceLock::new();
+        // LLVM IR supports two string escapes: `\\` and `\xx`.
+        RE.get_or_init(|| bytes::Regex::new(r"\\\\|\\([0-9A-Za-z]{2})").unwrap())
+    };
+
+    fn u8_from_hex_digits(digits: &[u8]) -> u8 {
+        // We know that the input contains exactly 2 hex digits, so these calls
+        // should never fail.
+        assert_eq!(digits.len(), 2);
+        let digits = std::str::from_utf8(digits).unwrap();
+        u8::from_str_radix(digits, 16).unwrap()
+    }
+
+    escape_re
+        .replace_all(contents.as_bytes(), |captures: &bytes::Captures<'_>| {
+            let byte = match captures.get(1) {
+                None => b'\\',
+                Some(hex_digits) => u8_from_hex_digits(hex_digits.as_bytes()),
+            };
+            [byte]
+        })
+        .into_owned()
+}
+
+/// LLVM's profiler/coverage metadata often uses an MD5 hash truncated to
+/// 64 bits as a way to associate data stored in different tables/sections.
+pub(crate) fn truncated_md5(bytes: &[u8]) -> u64 {
+    use md5::{Digest, Md5};
+    let mut hasher = Md5::new();
+    hasher.update(bytes);
+    let hash: [u8; 8] = hasher.finalize().as_slice()[..8].try_into().unwrap();
+    // The truncated hash is explicitly little-endian, regardless of host
+    // or target platform. (See `MD5Result::low` in LLVM's `MD5.h`.)
+    u64::from_le_bytes(hash)
+}
diff --git a/src/tools/coverage-dump/src/parser/tests.rs b/src/tools/coverage-dump/src/llvm_utils/tests.rs
index 506b0a6200b..506b0a6200b 100644
--- a/src/tools/coverage-dump/src/parser/tests.rs
+++ b/src/tools/coverage-dump/src/llvm_utils/tests.rs
diff --git a/src/tools/coverage-dump/src/main.rs b/src/tools/coverage-dump/src/main.rs
index b21e3e292f2..6408b97a06f 100644
--- a/src/tools/coverage-dump/src/main.rs
+++ b/src/tools/coverage-dump/src/main.rs
@@ -1,4 +1,5 @@
 mod covfun;
+mod llvm_utils;
 mod parser;
 mod prf_names;
 
diff --git a/src/tools/coverage-dump/src/parser.rs b/src/tools/coverage-dump/src/parser.rs
index 0bd4abdae3e..f26a57b43b3 100644
--- a/src/tools/coverage-dump/src/parser.rs
+++ b/src/tools/coverage-dump/src/parser.rs
@@ -1,38 +1,4 @@
-#[cfg(test)]
-mod tests;
-
-use std::sync::OnceLock;
-
 use anyhow::ensure;
-use regex::bytes;
-
-/// Given the raw contents of a string literal in LLVM IR assembly, decodes any
-/// backslash escapes and returns a vector containing the resulting byte string.
-pub(crate) fn unescape_llvm_string_contents(contents: &str) -> Vec<u8> {
-    let escape_re = {
-        static RE: OnceLock<bytes::Regex> = OnceLock::new();
-        // LLVM IR supports two string escapes: `\\` and `\xx`.
-        RE.get_or_init(|| bytes::Regex::new(r"\\\\|\\([0-9A-Za-z]{2})").unwrap())
-    };
-
-    fn u8_from_hex_digits(digits: &[u8]) -> u8 {
-        // We know that the input contains exactly 2 hex digits, so these calls
-        // should never fail.
-        assert_eq!(digits.len(), 2);
-        let digits = std::str::from_utf8(digits).unwrap();
-        u8::from_str_radix(digits, 16).unwrap()
-    }
-
-    escape_re
-        .replace_all(contents.as_bytes(), |captures: &bytes::Captures<'_>| {
-            let byte = match captures.get(1) {
-                None => b'\\',
-                Some(hex_digits) => u8_from_hex_digits(hex_digits.as_bytes()),
-            };
-            [byte]
-        })
-        .into_owned()
-}
 
 pub(crate) struct Parser<'a> {
     rest: &'a [u8],
diff --git a/src/tools/coverage-dump/src/prf_names.rs b/src/tools/coverage-dump/src/prf_names.rs
index 96d097c79a3..fe193efd8e5 100644
--- a/src/tools/coverage-dump/src/prf_names.rs
+++ b/src/tools/coverage-dump/src/prf_names.rs
@@ -4,7 +4,8 @@ use std::sync::OnceLock;
 use anyhow::{anyhow, ensure};
 use regex::Regex;
 
-use crate::parser::{Parser, unescape_llvm_string_contents};
+use crate::llvm_utils::{truncated_md5, unescape_llvm_string_contents};
+use crate::parser::Parser;
 
 /// Scans through the contents of an LLVM IR assembly file to find `__llvm_prf_names`
 /// entries, decodes them, and creates a table that maps name hash values to
@@ -25,18 +26,6 @@ pub(crate) fn make_function_names_table(llvm_ir: &str) -> anyhow::Result<HashMap
         Some(payload)
     }
 
-    /// LLVM's profiler/coverage metadata often uses an MD5 hash truncated to
-    /// 64 bits as a way to associate data stored in different tables/sections.
-    fn truncated_md5(bytes: &[u8]) -> u64 {
-        use md5::{Digest, Md5};
-        let mut hasher = Md5::new();
-        hasher.update(bytes);
-        let hash: [u8; 8] = hasher.finalize().as_slice()[..8].try_into().unwrap();
-        // The truncated hash is explicitly little-endian, regardless of host
-        // or target platform. (See `MD5Result::low` in LLVM's `MD5.h`.)
-        u64::from_le_bytes(hash)
-    }
-
     fn demangle_if_able(symbol_name_bytes: &[u8]) -> anyhow::Result<String> {
         // In practice, raw symbol names should always be ASCII.
         let symbol_name_str = std::str::from_utf8(symbol_name_bytes)?;
author	Zalathar <Zalathar@users.noreply.github.com>	2025-04-23 17:36:01 +1000
committer	Zalathar <Zalathar@users.noreply.github.com>	2025-05-06 11:58:50 +1000
commit	89319f2e128379a96943de2be347ec09a80abca4 (patch)
tree	a0b14b15a489c8dab444119211b9a9d753c9dc3a /src
parent	a3d5562fcf7f67491d55588ac3279bfade9dadcd (diff)
download	rust-89319f2e128379a96943de2be347ec09a80abca4.tar.gz rust-89319f2e128379a96943de2be347ec09a80abca4.zip