about summary refs log tree commit diff
path: root/library/stdarch/crates/stdarch-test/src/disassembly.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/stdarch-test/src/disassembly.rs')
-rw-r--r--library/stdarch/crates/stdarch-test/src/disassembly.rs208
1 files changed, 208 insertions, 0 deletions
diff --git a/library/stdarch/crates/stdarch-test/src/disassembly.rs b/library/stdarch/crates/stdarch-test/src/disassembly.rs
new file mode 100644
index 00000000000..f5167ea8d8e
--- /dev/null
+++ b/library/stdarch/crates/stdarch-test/src/disassembly.rs
@@ -0,0 +1,208 @@
+//! Disassembly calling function for most targets.
+
+use crate::Function;
+use std::{collections::HashSet, env, str};
+
+// Extracts the "shim" name from the `symbol`.
+fn normalize(mut symbol: &str) -> String {
+    // Remove trailing colon:
+    if symbol.ends_with(':') {
+        symbol = &symbol[..symbol.len() - 1];
+    }
+    if symbol.ends_with('>') {
+        symbol = &symbol[..symbol.len() - 1];
+    }
+    if let Some(idx) = symbol.find('<') {
+        symbol = &symbol[idx + 1..];
+    }
+
+    let mut symbol = rustc_demangle::demangle(symbol).to_string();
+    symbol = match symbol.rfind("::h") {
+        Some(i) => symbol[..i].to_string(),
+        None => symbol.to_string(),
+    };
+
+    // Remove Rust paths
+    if let Some(last_colon) = symbol.rfind(':') {
+        symbol = symbol[last_colon + 1..].to_string();
+    }
+
+    // Normalize to no leading underscore to handle platforms that may
+    // inject extra ones in symbol names.
+    while symbol.starts_with('_') || symbol.starts_with('.') {
+        symbol.remove(0);
+    }
+    // Windows/x86 has a suffix such as @@4.
+    if let Some(idx) = symbol.find("@@") {
+        symbol = symbol[..idx].to_string();
+    }
+    symbol
+}
+
+#[cfg(target_env = "msvc")]
+pub(crate) fn disassemble_myself() -> HashSet<Function> {
+    let me = env::current_exe().expect("failed to get current exe");
+
+    let target = if cfg!(target_arch = "x86_64") {
+        "x86_64-pc-windows-msvc"
+    } else if cfg!(target_arch = "x86") {
+        "i686-pc-windows-msvc"
+    } else if cfg!(target_arch = "aarch64") {
+        "aarch64-pc-windows-msvc"
+    } else {
+        panic!("disassembly unimplemented")
+    };
+    let mut cmd =
+        cc::windows_registry::find(target, "dumpbin.exe").expect("failed to find `dumpbin` tool");
+    let output = cmd
+        .arg("/DISASM:NOBYTES")
+        .arg(&me)
+        .output()
+        .expect("failed to execute dumpbin");
+    println!(
+        "{}\n{}",
+        output.status,
+        String::from_utf8_lossy(&output.stderr)
+    );
+    assert!(output.status.success());
+    // Windows does not return valid UTF-8 output:
+    parse(&String::from_utf8_lossy(Vec::leak(output.stdout)))
+}
+
+#[cfg(not(target_env = "msvc"))]
+pub(crate) fn disassemble_myself() -> HashSet<Function> {
+    let me = env::current_exe().expect("failed to get current exe");
+
+    let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
+    let add_args = if cfg!(target_vendor = "apple") && cfg!(target_arch = "aarch64") {
+        // Target features need to be enabled for LLVM objdump on Darwin ARM64
+        vec!["--mattr=+v8.6a,+crypto,+tme"]
+    } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
+        vec!["--mattr=+zk,+zks,+zbc,+zbb"]
+    } else {
+        vec![]
+    };
+    let output = std::process::Command::new(objdump.clone())
+        .arg("--disassemble")
+        .arg("--no-show-raw-insn")
+        .args(add_args)
+        .arg(&me)
+        .output()
+        .unwrap_or_else(|_| panic!("failed to execute objdump. OBJDUMP={objdump}"));
+    println!(
+        "{}\n{}",
+        output.status,
+        String::from_utf8_lossy(&output.stderr)
+    );
+    assert!(output.status.success());
+
+    let disassembly = String::from_utf8_lossy(Vec::leak(output.stdout));
+
+    parse(&disassembly)
+}
+
+fn parse(output: &str) -> HashSet<Function> {
+    let mut lines = output.lines();
+
+    println!(
+        "First 100 lines of the disassembly input containing {} lines:",
+        lines.clone().count()
+    );
+    for line in output.lines().take(100) {
+        println!("{line}");
+    }
+
+    let mut functions = HashSet::new();
+    let mut cached_header = None;
+    while let Some(header) = cached_header.take().or_else(|| lines.next()) {
+        if !header.ends_with(':') || !header.contains("stdarch_test_shim") {
+            continue;
+        }
+        eprintln!("header: {header}");
+        let symbol = normalize(header);
+        eprintln!("normalized symbol: {symbol}");
+        let mut instructions = Vec::new();
+        for instruction in lines.by_ref() {
+            if instruction.ends_with(':') {
+                cached_header = Some(instruction);
+                break;
+            }
+            if instruction.is_empty() {
+                cached_header = None;
+                break;
+            }
+            let mut parts = if cfg!(target_env = "msvc") {
+                // Each line looks like:
+                //
+                // >  $addr: $instr..
+                instruction
+                    .split(&[' ', ','])
+                    .filter(|&x| !x.is_empty())
+                    .skip(1)
+                    .map(str::to_lowercase)
+                    .skip_while(|s| matches!(&**s, "lock" | "vex")) // skip x86-specific prefix
+                    .collect::<Vec<String>>()
+            } else {
+                // objdump with --no-show-raw-insn
+                // Each line of instructions should look like:
+                //
+                //      $rel_offset:       $instruction...
+                instruction
+                    .split_whitespace()
+                    .skip(1)
+                    .skip_while(|s| matches!(*s, "lock" | "{evex}" | "{vex}")) // skip x86-specific prefix
+                    .map(ToString::to_string)
+                    .collect::<Vec<String>>()
+            };
+
+            if cfg!(any(target_arch = "aarch64", target_arch = "arm64ec")) {
+                // Normalize [us]shll.* ..., #0 instructions to the preferred form: [us]xtl.* ...
+                // as neither LLVM objdump nor dumpbin does that.
+                // See https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/UXTL--UXTL2--Unsigned-extend-Long--an-alias-of-USHLL--USHLL2-
+                // and https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/SXTL--SXTL2--Signed-extend-Long--an-alias-of-SSHLL--SSHLL2-
+                // for details.
+                fn is_shll(instr: &str) -> bool {
+                    if cfg!(target_env = "msvc") {
+                        instr.starts_with("ushll") || instr.starts_with("sshll")
+                    } else {
+                        instr.starts_with("ushll.") || instr.starts_with("sshll.")
+                    }
+                }
+                match (parts.first(), parts.last()) {
+                    (Some(instr), Some(last_arg)) if is_shll(instr) && last_arg == "#0" => {
+                        assert_eq!(parts.len(), 4);
+                        let mut new_parts = Vec::with_capacity(3);
+                        let new_instr = format!("{}{}{}", &instr[..1], "xtl", &instr[5..]);
+                        new_parts.push(new_instr);
+                        new_parts.push(parts[1].clone());
+                        new_parts.push(parts[2][0..parts[2].len() - 1].to_owned()); // strip trailing comma
+                        parts = new_parts;
+                    }
+                    // dumpbin uses "ins" instead of "mov"
+                    (Some(instr), _) if cfg!(target_env = "msvc") && instr == "ins" => {
+                        parts[0] = "mov".to_string()
+                    }
+                    _ => {}
+                };
+            }
+
+            instructions.push(parts.join(" "));
+            if matches!(&**instructions.last().unwrap(), "ret" | "retq") {
+                cached_header = None;
+                break;
+            }
+        }
+        let function = Function {
+            name: symbol,
+            instrs: instructions,
+        };
+        assert!(functions.insert(function));
+    }
+
+    eprintln!("all found functions dump:");
+    for k in &functions {
+        eprintln!("  f: {}", k.name);
+    }
+
+    functions
+}