about summary refs log tree commit diff
path: root/library/compiler-builtins
diff options
context:
space:
mode:
Diffstat (limited to 'library/compiler-builtins')
-rw-r--r--library/compiler-builtins/Cargo.toml1
-rwxr-xr-xlibrary/compiler-builtins/ci/run.sh125
-rw-r--r--library/compiler-builtins/crates/symbol-check/Cargo.toml13
-rw-r--r--library/compiler-builtins/crates/symbol-check/src/main.rs231
4 files changed, 262 insertions, 108 deletions
diff --git a/library/compiler-builtins/Cargo.toml b/library/compiler-builtins/Cargo.toml
index b39ec8a25da..bc6b4bd2979 100644
--- a/library/compiler-builtins/Cargo.toml
+++ b/library/compiler-builtins/Cargo.toml
@@ -6,6 +6,7 @@ members = [
     "crates/libm-macros",
     "crates/musl-math-sys",
     "crates/panic-handler",
+    "crates/symbol-check",
     "crates/util",
     "libm",
     "libm-test",
diff --git a/library/compiler-builtins/ci/run.sh b/library/compiler-builtins/ci/run.sh
index 68d13c130bc..cf3f7dfda1d 100755
--- a/library/compiler-builtins/ci/run.sh
+++ b/library/compiler-builtins/ci/run.sh
@@ -47,87 +47,25 @@ else
     fi
 fi
 
-
-declare -a rlib_paths
-
-# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs
-update_rlib_paths() {
-    if [ -d /builtins-target ]; then
-        rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
-    else
-        rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
-    fi
-}
-
-# Remove any existing artifacts from previous tests that don't set #![compiler_builtins]
-update_rlib_paths
-rm -f "${rlib_paths[@]}"
-
-cargo build -p compiler_builtins --target "$target"
-cargo build -p compiler_builtins --target "$target" --release
-cargo build -p compiler_builtins --target "$target" --features c
-cargo build -p compiler_builtins --target "$target" --features c --release
-cargo build -p compiler_builtins --target "$target" --features no-asm
-cargo build -p compiler_builtins --target "$target" --features no-asm --release
-cargo build -p compiler_builtins --target "$target" --features no-f16-f128
-cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release
-
-PREFIX=${target//unknown-/}-
-case "$target" in
-    armv7-*)
-        PREFIX=arm-linux-gnueabihf-
-        ;;
-    thumb*)
-        PREFIX=arm-none-eabi-
-        ;;
-    *86*-*)
-        PREFIX=
-        ;;
-esac
-
-NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) )
-if [ "$NM" = "" ]; then
-  NM="${PREFIX}nm"
-fi
-
-# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with
-# rustup run to ensure that those are in PATH.
-TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')"
-if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then
-  NM="rustup run $TOOLCHAIN $NM"
-fi
-
-# Look out for duplicated symbols when we include the compiler-rt (C) implementation
-update_rlib_paths
-for rlib in "${rlib_paths[@]}"; do
-    set +x
-    echo "================================================================"
-    echo "checking $rlib for duplicate symbols"
-    echo "================================================================"
-    set -x
-    
-    duplicates_found=0
-
-    # NOTE On i586, It's normal that the get_pc_thunk symbol appears several
-    # times so ignore it
-    $NM -g --defined-only "$rlib" 2>&1 |
-      sort |
-      uniq -d |
-      grep -v __x86.get_pc_thunk --quiet |
-      grep 'T __' && duplicates_found=1
-
-    if [ "$duplicates_found" != 0 ]; then
-        echo "error: found duplicate symbols"
-        exit 1
-    else
-        echo "success; no duplicate symbols found"
-    fi
-done
-
-rm -f "${rlib_paths[@]}"
+# Ensure there are no duplicate symbols or references to `core` when
+# `compiler-builtins` is built with various features. Symcheck invokes Cargo to
+# build with the arguments we provide it, then validates the built artifacts.
+symcheck=(cargo run -p symbol-check --release)
+[[ "$target" = "wasm"* ]] && symcheck+=(--features wasm)
+symcheck+=(-- build-and-check)
+
+"${symcheck[@]}" -p compiler_builtins --target "$target"
+"${symcheck[@]}" -p compiler_builtins --target "$target" --release
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features c
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128
+"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release
 
 build_intrinsics_test() {
-    cargo build \
+    # symcheck also checks the results of builtins-test-intrinsics
+    "${symcheck[@]}" \
         --target "$target" --verbose \
         --manifest-path builtins-test-intrinsics/Cargo.toml "$@"
 }
@@ -143,35 +81,6 @@ build_intrinsics_test --features c --release
 CARGO_PROFILE_DEV_LTO=true build_intrinsics_test
 CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release
 
-# Ensure no references to any symbols from core
-update_rlib_paths
-for rlib in "${rlib_paths[@]}"; do
-    set +x
-    echo "================================================================"
-    echo "checking $rlib for references to core"
-    echo "================================================================"
-    set -x
-
-    tmpdir="${CARGO_TARGET_DIR:-target}/tmp"
-    test -d "$tmpdir" || mkdir "$tmpdir"
-    defined="$tmpdir/defined_symbols.txt"
-    undefined="$tmpdir/defined_symbols.txt"
-
-    $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined"
-    $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined"
-    grep_has_results=0
-    grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1
-
-    if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then
-        echo "FIXME: powerpc64 fails these tests"
-    elif [ "$grep_has_results" != 0 ]; then
-        echo "error: found unexpected references to core"
-        exit 1
-    else
-        echo "success; no references to core found"
-    fi
-done
-
 # Test libm
 
 # Make sure a simple build works
diff --git a/library/compiler-builtins/crates/symbol-check/Cargo.toml b/library/compiler-builtins/crates/symbol-check/Cargo.toml
new file mode 100644
index 00000000000..30969ee406a
--- /dev/null
+++ b/library/compiler-builtins/crates/symbol-check/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "symbol-check"
+version = "0.1.0"
+edition = "2024"
+publish = false
+
+[dependencies]
+# FIXME: used as a git dependency since the latest release does not support wasm
+object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" }
+serde_json = "1.0.140"
+
+[features]
+wasm = ["object/wasm"]
diff --git a/library/compiler-builtins/crates/symbol-check/src/main.rs b/library/compiler-builtins/crates/symbol-check/src/main.rs
new file mode 100644
index 00000000000..104505438cc
--- /dev/null
+++ b/library/compiler-builtins/crates/symbol-check/src/main.rs
@@ -0,0 +1,231 @@
+//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any
+//! linking errors.
+
+use std::collections::{BTreeMap, BTreeSet};
+use std::fs;
+use std::io::{BufRead, BufReader};
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+
+use object::read::archive::{ArchiveFile, ArchiveMember};
+use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection};
+use serde_json::Value;
+
+const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
+const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None];
+
+const USAGE: &str = "Usage:
+
+    symbol-check build-and-check CARGO_ARGS ...
+
+Cargo will get invoked with `CARGO_ARGS` and all output
+`compiler_builtins*.rlib` files will be checked.
+";
+
+fn main() {
+    // Create a `&str` vec so we can match on it.
+    let args = std::env::args().collect::<Vec<_>>();
+    let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
+
+    match &args_ref[1..] {
+        ["build-and-check", rest @ ..] if !rest.is_empty() => {
+            let paths = exec_cargo_with_args(rest);
+            for path in paths {
+                println!("Checking {}", path.display());
+                verify_no_duplicates(&path);
+                verify_core_symbols(&path);
+            }
+        }
+        _ => {
+            println!("{USAGE}");
+            std::process::exit(1);
+        }
+    }
+}
+
+/// Run `cargo build` with the provided additional arguments, collecting the list of created
+/// libraries.
+fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
+    let mut cmd = Command::new("cargo")
+        .arg("build")
+        .arg("--message-format=json")
+        .args(args)
+        .stdout(Stdio::piped())
+        .spawn()
+        .expect("failed to launch Cargo");
+
+    let stdout = cmd.stdout.take().unwrap();
+    let reader = BufReader::new(stdout);
+    let mut check_files = Vec::new();
+
+    for line in reader.lines() {
+        let line = line.expect("failed to read line");
+        println!("{line}"); // tee to stdout
+
+        // Select only steps that create files
+        let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
+        if j["reason"] != "compiler-artifact" {
+            continue;
+        }
+
+        // Find rlibs in the created file list that match our expected library names and
+        // extensions.
+        for fpath in j["filenames"].as_array().expect("filenames not an array") {
+            let path = fpath.as_str().expect("file name not a string");
+            let path = PathBuf::from(path);
+
+            if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) {
+                let fname = path.file_name().unwrap().to_str().unwrap();
+
+                if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) {
+                    check_files.push(path);
+                }
+            }
+        }
+    }
+
+    cmd.wait().expect("failed to wait on Cargo");
+
+    assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
+    println!("Collected the following rlibs to check: {check_files:#?}");
+
+    check_files
+}
+
+/// Information collected from `object`, for convenience.
+#[expect(unused)] // only for printing
+#[derive(Clone, Debug)]
+struct SymInfo {
+    name: String,
+    kind: SymbolKind,
+    scope: SymbolScope,
+    section: SymbolSection,
+    is_undefined: bool,
+    is_global: bool,
+    is_local: bool,
+    is_weak: bool,
+    is_common: bool,
+    address: u64,
+    object: String,
+}
+
+impl SymInfo {
+    fn new(sym: &Symbol, member: &ArchiveMember) -> Self {
+        Self {
+            name: sym.name().expect("missing name").to_owned(),
+            kind: sym.kind(),
+            scope: sym.scope(),
+            section: sym.section(),
+            is_undefined: sym.is_undefined(),
+            is_global: sym.is_global(),
+            is_local: sym.is_local(),
+            is_weak: sym.is_weak(),
+            is_common: sym.is_common(),
+            address: sym.address(),
+            object: String::from_utf8_lossy(member.name()).into_owned(),
+        }
+    }
+}
+
+/// Ensure that the same global symbol isn't defined in multiple object files within an archive.
+///
+/// Note that this will also locate cases where a symbol is weakly defined in more than one place.
+/// Technically there are no linker errors that will come from this, but it keeps our binary more
+/// straightforward and saves some distribution size.
+fn verify_no_duplicates(path: &Path) {
+    let mut syms = BTreeMap::<String, SymInfo>::new();
+    let mut dups = Vec::new();
+    let mut found_any = false;
+
+    for_each_symbol(path, |symbol, member| {
+        // Only check defined globals
+        if !symbol.is_global() || symbol.is_undefined() {
+            return;
+        }
+
+        let sym = SymInfo::new(&symbol, member);
+
+        // x86-32 includes multiple copies of thunk symbols
+        if sym.name.starts_with("__x86.get_pc_thunk") {
+            return;
+        }
+
+        // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
+        // relocations. These are allowed to have repeated definitions.
+        let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];
+        if win_allowed_dup_pfx
+            .iter()
+            .any(|pfx| sym.name.starts_with(pfx))
+        {
+            return;
+        }
+
+        match syms.get(&sym.name) {
+            Some(existing) => {
+                dups.push(sym);
+                dups.push(existing.clone());
+            }
+            None => {
+                syms.insert(sym.name.clone(), sym);
+            }
+        }
+
+        found_any = true;
+    });
+
+    assert!(found_any, "no symbols found");
+
+    if !dups.is_empty() {
+        dups.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+        panic!("found duplicate symbols: {dups:#?}");
+    }
+
+    println!("    success: no duplicate symbols found");
+}
+
+/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
+fn verify_core_symbols(path: &Path) {
+    let mut defined = BTreeSet::new();
+    let mut undefined = Vec::new();
+    let mut has_symbols = false;
+
+    for_each_symbol(path, |symbol, member| {
+        has_symbols = true;
+
+        // Find only symbols from `core`
+        if !symbol.name().unwrap().contains("_ZN4core") {
+            return;
+        }
+
+        let sym = SymInfo::new(&symbol, member);
+        if sym.is_undefined {
+            undefined.push(sym);
+        } else {
+            defined.insert(sym.name);
+        }
+    });
+
+    assert!(has_symbols, "no symbols found");
+
+    // Discard any symbols that are defined somewhere in the archive
+    undefined.retain(|sym| !defined.contains(&sym.name));
+
+    if !undefined.is_empty() {
+        undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+        panic!("found undefined symbols from core: {undefined:#?}");
+    }
+
+    println!("    success: no undefined references to core found");
+}
+
+/// For a given archive path, do something with each symbol.
+fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) {
+    let data = fs::read(path).expect("reading file failed");
+    let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed");
+    for member in archive.members() {
+        let member = member.expect("failed to access member");
+        let obj_data = member.data(&*data).expect("failed to access object");
+        let obj = object::File::parse(obj_data).expect("failed to parse object");
+        obj.symbols().for_each(|sym| f(sym, &member));
+    }
+}