diff options
Diffstat (limited to 'library/stdarch/crates/stdarch-test/src/lib.rs')
| -rw-r--r-- | library/stdarch/crates/stdarch-test/src/lib.rs | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs new file mode 100644 index 00000000000..f6614f6d51c --- /dev/null +++ b/library/stdarch/crates/stdarch-test/src/lib.rs @@ -0,0 +1,218 @@ +//! Runtime support needed for testing the stdarch crate. +//! +//! This basically just disassembles the current executable and then parses the +//! output once globally and then provides the `assert` function which makes +//! assertions about the disassembly of a function. +#![deny(rust_2018_idioms)] +#![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)] + +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate cfg_if; + +pub use assert_instr_macro::*; +pub use simd_test_macro::*; +use std::{cmp, collections::HashSet, env, hash, hint::black_box, str}; + +cfg_if! { + if #[cfg(target_arch = "wasm32")] { + pub mod wasm; + use wasm::disassemble_myself; + } else { + mod disassembly; + use crate::disassembly::disassemble_myself; + } +} + +lazy_static! { + static ref DISASSEMBLY: HashSet<Function> = disassemble_myself(); +} + +#[derive(Debug)] +struct Function { + name: String, + instrs: Vec<String>, +} +impl Function { + fn new(n: &str) -> Self { + Self { + name: n.to_string(), + instrs: Vec::new(), + } + } +} + +impl cmp::PartialEq for Function { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} +impl cmp::Eq for Function {} + +impl hash::Hash for Function { + fn hash<H: hash::Hasher>(&self, state: &mut H) { + self.name.hash(state) + } +} + +/// Main entry point for this crate, called by the `#[assert_instr]` macro. +/// +/// This asserts that the function at `fnptr` contains the instruction +/// `expected` provided. +pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { + // Make sure that the shim is not removed + black_box(shim_addr); + + //eprintln!("shim name: {fnname}"); + let function = &DISASSEMBLY + .get(&Function::new(fnname)) + .unwrap_or_else(|| panic!("function \"{fnname}\" not found in the disassembly")); + //eprintln!(" function: {:?}", function); + + let mut instrs = &function.instrs[..]; + while instrs.last().is_some_and(|s| s == "nop" || s == "int3") { + instrs = &instrs[..instrs.len() - 1]; + } + + // Look for `expected` as the first part of any instruction in this + // function, e.g., tzcntl in tzcntl %rax,%rax. + // + // There are two cases when the expected instruction is nop: + // 1. The expected intrinsic is compiled away so we can't + // check for it - aka the intrinsic is not generating any code. + // 2. It is a mark, indicating that the instruction will be + // compiled into other instructions - mainly because of llvm + // optimization. + let expected = if expected == "unknown" { + "<unknown>" // Workaround for rust-lang/stdarch#1674, todo: remove when the issue is fixed + } else { + expected + }; + let found = expected == "nop" || instrs.iter().any(|s| s.starts_with(expected)); + + // Look for subroutine call instructions in the disassembly to detect whether + // inlining failed: all intrinsics are `#[inline(always)]`, so calling one + // intrinsic from another should not generate subroutine call instructions. + let inlining_failed = if cfg!(target_arch = "x86_64") || cfg!(target_arch = "wasm32") { + instrs.iter().any(|s| s.starts_with("call ")) + } else if cfg!(target_arch = "x86") { + instrs.windows(2).any(|s| { + // On 32-bit x86 position independent code will call itself and be + // immediately followed by a `pop` to learn about the current address. + // Let's not take that into account when considering whether a function + // failed inlining something. + s[0].starts_with("call ") && s[1].starts_with("pop") // FIXME: original logic but does not match comment + }) + } else if cfg!(any( + target_arch = "aarch64", + target_arch = "arm64ec", + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + instrs.iter().any(|s| s.starts_with("bl ")) + } else { + // FIXME: Add detection for other archs + false + }; + + let instruction_limit = std::env::var("STDARCH_ASSERT_INSTR_LIMIT") + .ok() + .map_or_else( + || match expected { + // `cpuid` returns a pretty big aggregate structure, so exempt + // it from the slightly more restrictive 22 instructions below. + "cpuid" => 30, + + // These require 8 loads and stores, so it _just_ overflows the limit + "aesencwide128kl" | "aesencwide256kl" | "aesdecwide128kl" | "aesdecwide256kl" => 24, + + // Apparently, on Windows, LLVM generates a bunch of + // saves/restores of xmm registers around these instructions, + // which exceeds the limit of 20 below. As it seems dictated by + // Windows's ABI (I believe?), we probably can't do much + // about it. + "vzeroall" | "vzeroupper" if cfg!(windows) => 30, + + // Intrinsics using `cvtpi2ps` are typically "composites" and + // in some cases exceed the limit. + "cvtpi2ps" => 25, + // core_arch/src/arm_shared/simd32 + // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit) + "usad8" | "vfma" | "vfms" => 27, + "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, + // core_arch/src/arm_shared/simd32 + // vst1q_s64_x4_vst1 : #instructions = 27 >= 22 (limit) + "vld3" => 28, + // core_arch/src/arm_shared/simd32 + // vld4q_lane_u32_vld4 : #instructions = 36 >= 22 (limit) + "vld4" => 37, + // core_arch/src/arm_shared/simd32 + // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit) + "vst1" => 41, + // core_arch/src/arm_shared/simd32 + // vst3q_u32_vst3 : #instructions = 25 >= 22 (limit) + "vst3" => 26, + // core_arch/src/arm_shared/simd32 + // vst4q_u32_vst4 : #instructions = 33 >= 22 (limit) + "vst4" => 34, + + // core_arch/src/arm_shared/simd32 + // vst1q_p64_x4_nop : #instructions = 33 >= 22 (limit) + "nop" if fnname.contains("vst1q_p64") => 34, + + // Original limit was 20 instructions, but ARM DSP Intrinsics + // are exactly 20 instructions long. So, bump the limit to 22 + // instead of adding here a long list of exceptions. + _ => { + // aarch64_be may add reverse instructions which increases + // the number of instructions generated. + if cfg!(all(target_endian = "big", target_arch = "aarch64")) { + 32 + } else { + 22 + } + } + }, + |v| v.parse().unwrap(), + ); + let probably_only_one_instruction = instrs.len() < instruction_limit; + + if found && probably_only_one_instruction && !inlining_failed { + return; + } + + // Help debug by printing out the found disassembly, and then panic as we + // didn't find the instruction. + println!("disassembly for {fnname}: ",); + for (i, instr) in instrs.iter().enumerate() { + println!("\t{i:2}: {instr}"); + } + + if !found { + panic!("failed to find instruction `{expected}` in the disassembly"); + } else if !probably_only_one_instruction { + panic!( + "instruction found, but the disassembly contains too many \ + instructions: #instructions = {} >= {} (limit)", + instrs.len(), + instruction_limit + ); + } else if inlining_failed { + panic!( + "instruction found, but the disassembly contains subroutine \ + call instructions, which hint that inlining failed" + ); + } +} + +pub fn assert_skip_test_ok(name: &str, missing_features: &[&str]) { + println!("Skipping test `{name}` due to missing target features:"); + for feature in missing_features { + println!(" - {feature}"); + } + match env::var("STDARCH_TEST_EVERYTHING") { + Ok(_) => panic!("skipped test `{name}` when it shouldn't be skipped"), + Err(_) => println!("Set STDARCH_TEST_EVERYTHING to make this an error."), + } +} |
