about summary refs log tree commit diff
path: root/library/compiler-builtins/crates/symbol-check/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/compiler-builtins/crates/symbol-check/src/main.rs')
-rw-r--r--library/compiler-builtins/crates/symbol-check/src/main.rs231
1 files changed, 231 insertions, 0 deletions
diff --git a/library/compiler-builtins/crates/symbol-check/src/main.rs b/library/compiler-builtins/crates/symbol-check/src/main.rs
new file mode 100644
index 00000000000..104505438cc
--- /dev/null
+++ b/library/compiler-builtins/crates/symbol-check/src/main.rs
@@ -0,0 +1,231 @@
+//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any
+//! linking errors.
+
+use std::collections::{BTreeMap, BTreeSet};
+use std::fs;
+use std::io::{BufRead, BufReader};
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+
+use object::read::archive::{ArchiveFile, ArchiveMember};
+use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection};
+use serde_json::Value;
+
+const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
+const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None];
+
+const USAGE: &str = "Usage:
+
+    symbol-check build-and-check CARGO_ARGS ...
+
+Cargo will get invoked with `CARGO_ARGS` and all output
+`compiler_builtins*.rlib` files will be checked.
+";
+
+fn main() {
+    // Create a `&str` vec so we can match on it.
+    let args = std::env::args().collect::<Vec<_>>();
+    let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
+
+    match &args_ref[1..] {
+        ["build-and-check", rest @ ..] if !rest.is_empty() => {
+            let paths = exec_cargo_with_args(rest);
+            for path in paths {
+                println!("Checking {}", path.display());
+                verify_no_duplicates(&path);
+                verify_core_symbols(&path);
+            }
+        }
+        _ => {
+            println!("{USAGE}");
+            std::process::exit(1);
+        }
+    }
+}
+
+/// Run `cargo build` with the provided additional arguments, collecting the list of created
+/// libraries.
+fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
+    let mut cmd = Command::new("cargo")
+        .arg("build")
+        .arg("--message-format=json")
+        .args(args)
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("failed to launch Cargo");
+
+    let stdout = cmd.stdout.take().unwrap();
+    let reader = BufReader::new(stdout);
+    let mut check_files = Vec::new();
+
+    for line in reader.lines() {
+        let line = line.expect("failed to read line");
+        println!("{line}"); // tee to stdout
+
+        // Select only steps that create files
+        let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
+        if j["reason"] != "compiler-artifact" {
+            continue;
+        }
+
+        // Find rlibs in the created file list that match our expected library names and
+        // extensions.
+        for fpath in j["filenames"].as_array().expect("filenames not an array") {
+            let path = fpath.as_str().expect("file name not a string");
+            let path = PathBuf::from(path);
+
+            if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) {
+                let fname = path.file_name().unwrap().to_str().unwrap();
+
+                if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) {
+                    check_files.push(path);
+                }
+            }
+        }
+    }
+
+    cmd.wait().expect("failed to wait on Cargo");
+
+    assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
+    println!("Collected the following rlibs to check: {check_files:#?}");
+
+    check_files
+}
+
+/// Information collected from `object`, for convenience.
+#[expect(unused)] // only for printing
+#[derive(Clone, Debug)]
+struct SymInfo {
+    name: String,
+    kind: SymbolKind,
+    scope: SymbolScope,
+    section: SymbolSection,
+    is_undefined: bool,
+    is_global: bool,
+    is_local: bool,
+    is_weak: bool,
+    is_common: bool,
+    address: u64,
+    object: String,
+}
+
+impl SymInfo {
+    fn new(sym: &Symbol, member: &ArchiveMember) -> Self {
+        Self {
+            name: sym.name().expect("missing name").to_owned(),
+            kind: sym.kind(),
+            scope: sym.scope(),
+            section: sym.section(),
+            is_undefined: sym.is_undefined(),
+            is_global: sym.is_global(),
+            is_local: sym.is_local(),
+            is_weak: sym.is_weak(),
+            is_common: sym.is_common(),
+            address: sym.address(),
+            object: String::from_utf8_lossy(member.name()).into_owned(),
+        }
+    }
+}
+
+/// Ensure that the same global symbol isn't defined in multiple object files within an archive.
+///
+/// Note that this will also locate cases where a symbol is weakly defined in more than one place.
+/// Technically there are no linker errors that will come from this, but it keeps our binary more
+/// straightforward and saves some distribution size.
+fn verify_no_duplicates(path: &Path) {
+    let mut syms = BTreeMap::<String, SymInfo>::new();
+    let mut dups = Vec::new();
+    let mut found_any = false;
+
+    for_each_symbol(path, |symbol, member| {
+        // Only check defined globals
+        if !symbol.is_global() || symbol.is_undefined() {
+            return;
+        }
+
+        let sym = SymInfo::new(&symbol, member);
+
+        // x86-32 includes multiple copies of thunk symbols
+        if sym.name.starts_with("__x86.get_pc_thunk") {
+            return;
+        }
+
+        // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
+        // relocations. These are allowed to have repeated definitions.
+        let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];
+        if win_allowed_dup_pfx
+            .iter()
+            .any(|pfx| sym.name.starts_with(pfx))
+        {
+            return;
+        }
+
+        match syms.get(&sym.name) {
+            Some(existing) => {
+                dups.push(sym);
+                dups.push(existing.clone());
+            }
+            None => {
+                syms.insert(sym.name.clone(), sym);
+            }
+        }
+
+        found_any = true;
+    });
+
+    assert!(found_any, "no symbols found");
+
+    if !dups.is_empty() {
+        dups.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+        panic!("found duplicate symbols: {dups:#?}");
+    }
+
+    println!("    success: no duplicate symbols found");
+}
+
+/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
+fn verify_core_symbols(path: &Path) {
+    let mut defined = BTreeSet::new();
+    let mut undefined = Vec::new();
+    let mut has_symbols = false;
+
+    for_each_symbol(path, |symbol, member| {
+        has_symbols = true;
+
+        // Find only symbols from `core`
+        if !symbol.name().unwrap().contains("_ZN4core") {
+            return;
+        }
+
+        let sym = SymInfo::new(&symbol, member);
+        if sym.is_undefined {
+            undefined.push(sym);
+        } else {
+            defined.insert(sym.name);
+        }
+    });
+
+    assert!(has_symbols, "no symbols found");
+
+    // Discard any symbols that are defined somewhere in the archive
+    undefined.retain(|sym| !defined.contains(&sym.name));
+
+    if !undefined.is_empty() {
+        undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+        panic!("found undefined symbols from core: {undefined:#?}");
+    }
+
+    println!("    success: no undefined references to core found");
+}
+
+/// For a given archive path, do something with each symbol.
+fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) {
+    let data = fs::read(path).expect("reading file failed");
+    let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed");
+    for member in archive.members() {
+        let member = member.expect("failed to access member");
+        let obj_data = member.data(&*data).expect("failed to access object");
+        let obj = object::File::parse(obj_data).expect("failed to parse object");
+        obj.symbols().for_each(|sym| f(sym, &member));
+    }
+}