diff options
| author | Trevor Gross <tmgross@umich.edu> | 2025-03-18 09:52:53 +0000 | 
|---|---|---|
| committer | Trevor Gross <t.gross35@gmail.com> | 2025-04-18 21:14:41 -0400 | 
| commit | 92b1e8454d7c4ea424ff970e6be283144584f812 (patch) | |
| tree | 3064bf6a40146c3104364b87eb418e7c7d1c150a /library/compiler-builtins/builtins-test | |
| parent | 3ea9f849d54b6fee2bdacea04cad4d15d68b3fb5 (diff) | |
| download | rust-92b1e8454d7c4ea424ff970e6be283144584f812.tar.gz rust-92b1e8454d7c4ea424ff970e6be283144584f812.zip | |
Rename `testcrate` to `builtins-test`
The repo will soon have `libm` as a top-level crate, so make it clear that this is only the test crate for `compiler-builtins`.
Diffstat (limited to 'library/compiler-builtins/builtins-test')
29 files changed, 5601 insertions, 0 deletions
| diff --git a/library/compiler-builtins/builtins-test/Cargo.toml b/library/compiler-builtins/builtins-test/Cargo.toml new file mode 100644 index 00000000000..526e9b18af0 --- /dev/null +++ b/library/compiler-builtins/builtins-test/Cargo.toml @@ -0,0 +1,102 @@ +[package] +name = "builtins-test" +version = "0.1.0" +authors = ["Alex Crichton <alex@alexcrichton.com>"] +edition = "2024" +publish = false + +[lib] +test = false +doctest = false + +[dependencies] +# For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential +# problems with system RNGs on the variety of platforms this crate is tested on. +# `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts. +rand_xoshiro = "0.6" +# To compare float builtins against +rustc_apfloat = "0.2.1" +# Really a dev dependency, but dev dependencies can't be optional +iai-callgrind = { version = "0.14.0", optional = true } + +[dependencies.compiler_builtins] +path = "../compiler-builtins" +default-features = false +features = ["public-test-deps"] + +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +paste = "1.0.15" + +[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] +test = { git = "https://github.com/japaric/utest" } +utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" } +utest-macros = { git = "https://github.com/japaric/utest" } + +[features] +default = ["mangled-names"] +c = ["compiler_builtins/c"] +no-asm = ["compiler_builtins/no-asm"] +no-f16-f128 = ["compiler_builtins/no-f16-f128"] +mem = ["compiler_builtins/mem"] +mangled-names = ["compiler_builtins/mangled-names"] +# Skip tests that rely on f128 symbols being available on the system +no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] +# Some platforms have some f128 functions but everything except integer conversions +no-sys-f128-int-convert = [] +no-sys-f16-f128-convert = [] +no-sys-f16-f64-convert = [] +# Skip tests that rely on f16 symbols being available on the system +no-sys-f16 = ["no-sys-f16-f64-convert"] + +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +# NOTE: benchmarks must be run with `--no-default-features` or with +# `-p builtins-test`, otherwise the default `compiler-builtins` feature +# of the `compiler_builtins` crate gets activated, resulting in linker +# errors. + +[[bench]] +name = "float_add" +harness = false + +[[bench]] +name = "float_sub" +harness = false + +[[bench]] +name = "float_mul" +harness = false + +[[bench]] +name = "float_div" +harness = false + +[[bench]] +name = "float_cmp" +harness = false + +[[bench]] +name = "float_conv" +harness = false + +[[bench]] +name = "float_extend" +harness = false + +[[bench]] +name = "float_trunc" +harness = false + +[[bench]] +name = "float_pow" +harness = false + +[[bench]] +name = "mem_icount" +harness = false +required-features = ["icount"] diff --git a/library/compiler-builtins/builtins-test/benches/float_add.rs b/library/compiler-builtins/builtins-test/benches/float_add.rs new file mode 100644 index 00000000000..197f90b319d --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_add.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::add; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: add_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: add::__addsf3, + sys_fn: __addsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: add::__adddf3, + sys_fn: __adddf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: add_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: add::__addtf3, + crate_fn_ppc: add::__addkf3, + sys_fn: __addtf3, + sys_fn_ppc: __addkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_add() { + let mut criterion = Criterion::default().configure_from_args(); + + add_f32(&mut criterion); + add_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + add_f128(&mut criterion); + } +} + +criterion_main!(float_add); diff --git a/library/compiler-builtins/builtins-test/benches/float_cmp.rs b/library/compiler-builtins/builtins-test/benches/float_cmp.rs new file mode 100644 index 00000000000..4493765ec1b --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_cmp.rs @@ -0,0 +1,208 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use criterion::{Criterion, criterion_main}; + +use compiler_builtins::float::cmp; + +/// `gt` symbols are allowed to return differing results, they just get compared +/// to 0. +fn gt_res_eq(a: i32, b: i32) -> bool { + let a_lt_0 = a <= 0; + let b_lt_0 = b <= 0; + (a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0) +} + +float_bench! { + name: cmp_f32_gt, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__gtsf2, + sys_fn: __gtsf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem,nostack), + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f32_unord, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__unordsf2, + sys_fn: __unordsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_gt, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__gtdf2, + sys_fn: __gtdf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_unord, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__unorddf2, + sys_fn: __unorddf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f128_gt, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__gttf2, + crate_fn_ppc: cmp::__gtkf2, + sys_fn: __gttf2, + sys_fn_ppc: __gtkf2, + sys_available: not(feature = "no-sys-f128"), + output_eq: gt_res_eq, + asm: [] +} + +float_bench! { + name: cmp_f128_unord, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__unordtf2, + crate_fn_ppc: cmp::__unordkf2, + sys_fn: __unordtf2, + sys_fn_ppc: __unordkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_cmp() { + let mut criterion = Criterion::default().configure_from_args(); + + cmp_f32_gt(&mut criterion); + cmp_f32_unord(&mut criterion); + cmp_f64_gt(&mut criterion); + cmp_f64_unord(&mut criterion); + + #[cfg(f128_enabled)] + { + cmp_f128_gt(&mut criterion); + cmp_f128_unord(&mut criterion); + } +} + +criterion_main!(float_cmp); diff --git a/library/compiler-builtins/builtins-test/benches/float_conv.rs b/library/compiler-builtins/builtins-test/benches/float_conv.rs new file mode 100644 index 00000000000..d4a7346d1d5 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_conv.rs @@ -0,0 +1,688 @@ +#![allow(improper_ctypes)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::conv; +use criterion::{Criterion, criterion_main}; + +/* unsigned int -> float */ + +float_bench! { + name: conv_u32_f32, + sig: (a: u32) -> f32, + crate_fn: conv::__floatunsisf, + sys_fn: __floatunsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2ss {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u32_f64, + sig: (a: u32) -> f64, + crate_fn: conv::__floatunsidf, + sys_fn: __floatunsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2sd {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_u32_f128, + sig: (a: u32) -> f128, + crate_fn: conv::__floatunsitf, + crate_fn_ppc: conv::__floatunsikf, + sys_fn: __floatunsitf, + sys_fn_ppc: __floatunsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_u64_f32, + sig: (a: u64) -> f32, + crate_fn: conv::__floatundisf, + sys_fn: __floatundisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f64, + sig: (a: u64) -> f64, + crate_fn: conv::__floatundidf, + sys_fn: __floatundidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_u64_f128, + sig: (a: u64) -> f128, + crate_fn: conv::__floatunditf, + crate_fn_ppc: conv::__floatundikf, + sys_fn: __floatunditf, + sys_fn_ppc: __floatundikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_u128_f32, + sig: (a: u128) -> f32, + crate_fn: conv::__floatuntisf, + sys_fn: __floatuntisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_u128_f64, + sig: (a: u128) -> f64, + crate_fn: conv::__floatuntidf, + sys_fn: __floatuntidf, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_u128_f128, + sig: (a: u128) -> f128, + crate_fn: conv::__floatuntitf, + crate_fn_ppc: conv::__floatuntikf, + sys_fn: __floatuntitf, + sys_fn_ppc: __floatuntikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +/* signed int -> float */ + +float_bench! { + name: conv_i32_f32, + sig: (a: i32) -> f32, + crate_fn: conv::__floatsisf, + sys_fn: __floatsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i32_f64, + sig: (a: i32) -> f64, + crate_fn: conv::__floatsidf, + sys_fn: __floatsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_i32_f128, + sig: (a: i32) -> f128, + crate_fn: conv::__floatsitf, + crate_fn_ppc: conv::__floatsikf, + sys_fn: __floatsitf, + sys_fn_ppc: __floatsikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_i64_f32, + sig: (a: i64) -> f32, + crate_fn: conv::__floatdisf, + sys_fn: __floatdisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f64, + sig: (a: i64) -> f64, + crate_fn: conv::__floatdidf, + sys_fn: __floatdidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_i64_f128, + sig: (a: i64) -> f128, + crate_fn: conv::__floatditf, + crate_fn_ppc: conv::__floatdikf, + sys_fn: __floatditf, + sys_fn_ppc: __floatdikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +float_bench! { + name: conv_i128_f32, + sig: (a: i128) -> f32, + crate_fn: conv::__floattisf, + sys_fn: __floattisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_i128_f64, + sig: (a: i128) -> f64, + crate_fn: conv::__floattidf, + sys_fn: __floattidf, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_i128_f128, + sig: (a: i128) -> f128, + crate_fn: conv::__floattitf, + crate_fn_ppc: conv::__floattikf, + sys_fn: __floattitf, + sys_fn_ppc: __floattikf, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +/* float -> unsigned int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u32, + sig: (a: f32) -> u32, + crate_fn: conv::__fixunssfsi, + sys_fn: __fixunssfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u64, + sig: (a: f32) -> u64, + crate_fn: conv::__fixunssfdi, + sys_fn: __fixunssfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u128, + sig: (a: f32) -> u128, + crate_fn: conv::__fixunssfti, + sys_fn: __fixunssfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_u32, + sig: (a: f64) -> u32, + crate_fn: conv::__fixunsdfsi, + sys_fn: __fixunsdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u64, + sig: (a: f64) -> u64, + crate_fn: conv::__fixunsdfdi, + sys_fn: __fixunsdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u128, + sig: (a: f64) -> u128, + crate_fn: conv::__fixunsdfti, + sys_fn: __fixunsdfti, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u32, + sig: (a: f128) -> u32, + crate_fn: conv::__fixunstfsi, + crate_fn_ppc: conv::__fixunskfsi, + sys_fn: __fixunstfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u64, + sig: (a: f128) -> u64, + crate_fn: conv::__fixunstfdi, + crate_fn_ppc: conv::__fixunskfdi, + sys_fn: __fixunstfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_u128, + sig: (a: f128) -> u128, + crate_fn: conv::__fixunstfti, + crate_fn_ppc: conv::__fixunskfti, + sys_fn: __fixunstfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +/* float -> signed int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i32, + sig: (a: f32) -> i32, + crate_fn: conv::__fixsfsi, + sys_fn: __fixsfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i64, + sig: (a: f32) -> i64, + crate_fn: conv::__fixsfdi, + sys_fn: __fixsfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i128, + sig: (a: f32) -> i128, + crate_fn: conv::__fixsfti, + sys_fn: __fixsfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_i32, + sig: (a: f64) -> i32, + crate_fn: conv::__fixdfsi, + sys_fn: __fixdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i64, + sig: (a: f64) -> i64, + crate_fn: conv::__fixdfdi, + sys_fn: __fixdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i128, + sig: (a: f64) -> i128, + crate_fn: conv::__fixdfti, + sys_fn: __fixdfti, + sys_available: all(), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i32, + sig: (a: f128) -> i32, + crate_fn: conv::__fixtfsi, + crate_fn_ppc: conv::__fixkfsi, + sys_fn: __fixtfsi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i64, + sig: (a: f128) -> i64, + crate_fn: conv::__fixtfdi, + crate_fn_ppc: conv::__fixkfdi, + sys_fn: __fixtfdi, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +#[cfg(f128_enabled)] +float_bench! { + name: conv_f128_i128, + sig: (a: f128) -> i128, + crate_fn: conv::__fixtfti, + crate_fn_ppc: conv::__fixkfti, + sys_fn: __fixtfti, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [] +} + +pub fn float_conv() { + let mut criterion = Criterion::default().configure_from_args(); + + conv_u32_f32(&mut criterion); + conv_u32_f64(&mut criterion); + conv_u64_f32(&mut criterion); + conv_u64_f64(&mut criterion); + conv_u128_f32(&mut criterion); + conv_u128_f64(&mut criterion); + conv_i32_f32(&mut criterion); + conv_i32_f64(&mut criterion); + conv_i64_f32(&mut criterion); + conv_i64_f64(&mut criterion); + conv_i128_f32(&mut criterion); + conv_i128_f64(&mut criterion); + conv_f64_u32(&mut criterion); + conv_f64_u64(&mut criterion); + conv_f64_u128(&mut criterion); + conv_f64_i32(&mut criterion); + conv_f64_i64(&mut criterion); + conv_f64_i128(&mut criterion); + + #[cfg(f128_enabled)] + // FIXME: ppc64le has a sporadic overflow panic in the crate functions + // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639> + #[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] + { + conv_u32_f128(&mut criterion); + conv_u64_f128(&mut criterion); + conv_u128_f128(&mut criterion); + conv_i32_f128(&mut criterion); + conv_i64_f128(&mut criterion); + conv_i128_f128(&mut criterion); + conv_f128_u32(&mut criterion); + conv_f128_u64(&mut criterion); + conv_f128_u128(&mut criterion); + conv_f128_i32(&mut criterion); + conv_f128_i64(&mut criterion); + conv_f128_i128(&mut criterion); + } +} + +criterion_main!(float_conv); diff --git a/library/compiler-builtins/builtins-test/benches/float_div.rs b/library/compiler-builtins/builtins-test/benches/float_div.rs new file mode 100644 index 00000000000..d5b0ad0fd40 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_div.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::div; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: div_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: div::__divsf3, + sys_fn: __divsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: div_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: div::__divdf3, + sys_fn: __divdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: div_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: div::__divtf3, + crate_fn_ppc: div::__divkf3, + sys_fn: __divtf3, + sys_fn_ppc: __divkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_div() { + let mut criterion = Criterion::default().configure_from_args(); + + div_f32(&mut criterion); + div_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + div_f128(&mut criterion); + } +} + +criterion_main!(float_div); diff --git a/library/compiler-builtins/builtins-test/benches/float_extend.rs b/library/compiler-builtins/builtins-test/benches/float_extend.rs new file mode 100644 index 00000000000..fc44e80c9e1 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_extend.rs @@ -0,0 +1,133 @@ +#![allow(unused_variables)] // "unused" f16 registers +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] + +use builtins_test::float_bench; +use compiler_builtins::float::extend; +use criterion::{Criterion, criterion_main}; + +#[cfg(f16_enabled)] +float_bench! { + name: extend_f16_f32, + sig: (a: f16) -> f32, + crate_fn: extend::__extendhfsf2, + sys_fn: __extendhfsf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "fcvt {ret:s}, {a:h}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f16_enabled)] +float_bench! { + name: extend_f16_f64, + sig: (a: f16) -> f64, + crate_fn: extend::__extendhfdf2, + sys_fn: __extendhfdf2, + sys_available: not(feature = "no-sys-f16-f64-convert"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:h}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(all(f16_enabled, f128_enabled))] +float_bench! { + name: extend_f16_f128, + sig: (a: f16) -> f128, + crate_fn: extend::__extendhftf2, + crate_fn_ppc: extend::__extendhfkf2, + sys_fn: __extendhftf2, + sys_fn_ppc: __extendhfkf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: extend_f32_f64, + sig: (a: f32) -> f64, + crate_fn: extend::__extendsfdf2, + sys_fn: __extendsfdf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: extend_f32_f128, + sig: (a: f32) -> f128, + crate_fn: extend::__extendsftf2, + crate_fn_ppc: extend::__extendsfkf2, + sys_fn: __extendsftf2, + sys_fn_ppc: __extendsfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +#[cfg(f128_enabled)] +float_bench! { + name: extend_f64_f128, + sig: (a: f64) -> f128, + crate_fn: extend::__extenddftf2, + crate_fn_ppc: extend::__extenddfkf2, + sys_fn: __extenddftf2, + sys_fn_ppc: __extenddfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +pub fn float_extend() { + let mut criterion = Criterion::default().configure_from_args(); + + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + { + extend_f16_f32(&mut criterion); + extend_f16_f64(&mut criterion); + + #[cfg(f128_enabled)] + extend_f16_f128(&mut criterion); + } + + extend_f32_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + extend_f32_f128(&mut criterion); + extend_f64_f128(&mut criterion); + } +} + +criterion_main!(float_extend); diff --git a/library/compiler-builtins/builtins-test/benches/float_mul.rs b/library/compiler-builtins/builtins-test/benches/float_mul.rs new file mode 100644 index 00000000000..a7a2d34aa04 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_mul.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::mul; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: mul_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: mul::__mulsf3, + sys_fn: __mulsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: mul::__muldf3, + sys_fn: __muldf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: mul_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: mul::__multf3, + crate_fn_ppc: mul::__mulkf3, + sys_fn: __multf3, + sys_fn_ppc: __mulkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_mul() { + let mut criterion = Criterion::default().configure_from_args(); + + mul_f32(&mut criterion); + mul_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + mul_f128(&mut criterion); + } +} + +criterion_main!(float_mul); diff --git a/library/compiler-builtins/builtins-test/benches/float_pow.rs b/library/compiler-builtins/builtins-test/benches/float_pow.rs new file mode 100644 index 00000000000..64e37dd3241 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_pow.rs @@ -0,0 +1,49 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::pow; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: powi_f32, + sig: (a: f32, b: i32) -> f32, + crate_fn: pow::__powisf2, + sys_fn: __powisf2, + sys_available: all(), + asm: [], +} + +float_bench! { + name: powi_f64, + sig: (a: f64, b: i32) -> f64, + crate_fn: pow::__powidf2, + sys_fn: __powidf2, + sys_available: all(), + asm: [], +} + +// FIXME(f16_f128): can be changed to only `f128_enabled` once `__multf3` and `__divtf3` are +// distributed by nightly. +#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))] +float_bench! { + name: powi_f128, + sig: (a: f128, b: i32) -> f128, + crate_fn: pow::__powitf2, + crate_fn_ppc: pow::__powikf2, + sys_fn: __powitf2, + sys_fn_ppc: __powikf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_pow() { + let mut criterion = Criterion::default().configure_from_args(); + + powi_f32(&mut criterion); + powi_f64(&mut criterion); + + #[cfg(all(f128_enabled, not(feature = "no-sys-f128")))] + powi_f128(&mut criterion); +} + +criterion_main!(float_pow); diff --git a/library/compiler-builtins/builtins-test/benches/float_sub.rs b/library/compiler-builtins/builtins-test/benches/float_sub.rs new file mode 100644 index 00000000000..8bae294cd56 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_sub.rs @@ -0,0 +1,93 @@ +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::float_bench; +use compiler_builtins::float::sub; +use criterion::{Criterion, criterion_main}; + +float_bench! { + name: sub_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: sub::__subsf3, + sys_fn: __subsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: sub::__subdf3, + sys_fn: __subdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +#[cfg(f128_enabled)] +float_bench! { + name: sub_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: sub::__subtf3, + crate_fn_ppc: sub::__subkf3, + sys_fn: __subtf3, + sys_fn_ppc: __subkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +pub fn float_sub() { + let mut criterion = Criterion::default().configure_from_args(); + + sub_f32(&mut criterion); + sub_f64(&mut criterion); + + #[cfg(f128_enabled)] + { + sub_f128(&mut criterion); + } +} + +criterion_main!(float_sub); diff --git a/library/compiler-builtins/builtins-test/benches/float_trunc.rs b/library/compiler-builtins/builtins-test/benches/float_trunc.rs new file mode 100644 index 00000000000..43310c7cfc8 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/float_trunc.rs @@ -0,0 +1,146 @@ +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] + +use builtins_test::float_bench; +use compiler_builtins::float::trunc; +use criterion::{Criterion, criterion_main}; + +#[cfg(f16_enabled)] +float_bench! { + name: trunc_f32_f16, + sig: (a: f32) -> f16, + crate_fn: trunc::__truncsfhf2, + sys_fn: __truncsfhf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f16; + asm!( + "fcvt {ret:h}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(f16_enabled)] +float_bench! { + name: trunc_f64_f16, + sig: (a: f64) -> f16, + crate_fn: trunc::__truncdfhf2, + sys_fn: __truncdfhf2, + sys_available: not(feature = "no-sys-f16-f64-convert"), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f16; + asm!( + "fcvt {ret:h}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: trunc_f64_f32, + sig: (a: f64) -> f32, + crate_fn: trunc::__truncdfsf2, + sys_fn: __truncdfsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsd2ss {ret}, {a}", + a = in(xmm_reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "fcvt {ret:s}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(all(f16_enabled, f128_enabled))] +float_bench! { + name: trunc_f128_f16, + sig: (a: f128) -> f16, + crate_fn: trunc::__trunctfhf2, + crate_fn_ppc: trunc::__trunckfhf2, + sys_fn: __trunctfhf2, + sys_fn_ppc: __trunckfhf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +#[cfg(f128_enabled)] +float_bench! { + name: trunc_f128_f32, + sig: (a: f128) -> f32, + crate_fn: trunc::__trunctfsf2, + crate_fn_ppc: trunc::__trunckfsf2, + sys_fn: __trunctfsf2, + sys_fn_ppc: __trunckfsf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +#[cfg(f128_enabled)] +float_bench! { + name: trunc_f128_f64, + sig: (a: f128) -> f64, + crate_fn: trunc::__trunctfdf2, + crate_fn_ppc: trunc::__trunckfdf2, + sys_fn: __trunctfdf2, + sys_fn_ppc: __trunckfdf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +pub fn float_trunc() { + let mut criterion = Criterion::default().configure_from_args(); + + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + { + trunc_f32_f16(&mut criterion); + trunc_f64_f16(&mut criterion); + } + + trunc_f64_f32(&mut criterion); + + #[cfg(f128_enabled)] + { + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + #[cfg(f16_enabled)] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + trunc_f128_f16(&mut criterion); + + trunc_f128_f32(&mut criterion); + trunc_f128_f64(&mut criterion); + } +} + +criterion_main!(float_trunc); diff --git a/library/compiler-builtins/builtins-test/benches/mem.rs b/library/compiler-builtins/builtins-test/benches/mem.rs new file mode 100644 index 00000000000..3f83926b6c5 --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/mem.rs @@ -0,0 +1,364 @@ +#![feature(test)] + +extern crate test; +use test::{Bencher, black_box}; + +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +const WORD_SIZE: usize = core::mem::size_of::<usize>(); + +struct AlignedVec { + vec: Vec<usize>, + size: usize, +} + +impl AlignedVec { + fn new(fill: u8, size: usize) -> Self { + let mut broadcast = fill as usize; + let mut bits = 8; + while bits < WORD_SIZE * 8 { + broadcast |= broadcast << bits; + bits *= 2; + } + + let vec = vec![broadcast; (size + WORD_SIZE - 1) & !WORD_SIZE]; + AlignedVec { vec, size } + } +} + +impl core::ops::Deref for AlignedVec { + type Target = [u8]; + fn deref(&self) -> &[u8] { + unsafe { core::slice::from_raw_parts(self.vec.as_ptr() as *const u8, self.size) } + } +} + +impl core::ops::DerefMut for AlignedVec { + fn deref_mut(&mut self) -> &mut [u8] { + unsafe { core::slice::from_raw_parts_mut(self.vec.as_mut_ptr() as *mut u8, self.size) } + } +} + +fn memcpy_builtin(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) { + let v1 = AlignedVec::new(1, n + offset1); + let mut v2 = AlignedVec::new(0, n + offset2); + b.bytes = n as u64; + b.iter(|| { + let src: &[u8] = black_box(&v1[offset1..]); + let dst: &mut [u8] = black_box(&mut v2[offset2..]); + dst.copy_from_slice(src); + }) +} + +fn memcpy_rust(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) { + let v1 = AlignedVec::new(1, n + offset1); + let mut v2 = AlignedVec::new(0, n + offset2); + b.bytes = n as u64; + b.iter(|| { + let src: &[u8] = black_box(&v1[offset1..]); + let dst: &mut [u8] = black_box(&mut v2[offset2..]); + unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) } + }) +} + +fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) { + let mut v1 = AlignedVec::new(0, n + offset); + b.bytes = n as u64; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1[offset..]); + let val: u8 = black_box(27); + for b in dst { + *b = val; + } + }) +} + +fn memset_rust(b: &mut Bencher, n: usize, offset: usize) { + let mut v1 = AlignedVec::new(0, n + offset); + b.bytes = n as u64; + b.iter(|| { + let dst: &mut [u8] = black_box(&mut v1[offset..]); + let val = black_box(27); + unsafe { memset(dst.as_mut_ptr(), val, n) } + }) +} + +fn memcmp_builtin(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + s1.cmp(s2) + }) +} + +fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + s1.cmp(s2) + }) +} + +fn memcmp_rust(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1); + let s2: &[u8] = black_box(&v2); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n) } + }) +} + +fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) } + }) +} + +fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) { + let mut v = AlignedVec::new(0, n + n / 2 + offset); + b.bytes = n as u64; + b.iter(|| { + let s: &mut [u8] = black_box(&mut v); + s.copy_within(0..n, n / 2 + offset); + }) +} + +fn memmove_rust(b: &mut Bencher, n: usize, offset: usize) { + let mut v = AlignedVec::new(0, n + n / 2 + offset); + b.bytes = n as u64; + b.iter(|| { + let dst: *mut u8 = black_box(&mut v[n / 2 + offset..]).as_mut_ptr(); + let src: *const u8 = black_box(&v).as_ptr(); + unsafe { memmove(dst, src, n) }; + }) +} + +#[bench] +fn memcpy_builtin_4096(b: &mut Bencher) { + memcpy_builtin(b, 4096, 0, 0) +} +#[bench] +fn memcpy_rust_4096(b: &mut Bencher) { + memcpy_rust(b, 4096, 0, 0) +} +#[bench] +fn memcpy_builtin_1048576(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 0, 0) +} +#[bench] +fn memcpy_rust_1048576(b: &mut Bencher) { + memcpy_rust(b, 1048576, 0, 0) +} +#[bench] +fn memcpy_builtin_4096_offset(b: &mut Bencher) { + memcpy_builtin(b, 4096, 65, 65) +} +#[bench] +fn memcpy_rust_4096_offset(b: &mut Bencher) { + memcpy_rust(b, 4096, 65, 65) +} +#[bench] +fn memcpy_builtin_1048576_offset(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 65, 65) +} +#[bench] +fn memcpy_rust_1048576_offset(b: &mut Bencher) { + memcpy_rust(b, 1048576, 65, 65) +} +#[bench] +fn memcpy_builtin_4096_misalign(b: &mut Bencher) { + memcpy_builtin(b, 4096, 65, 66) +} +#[bench] +fn memcpy_rust_4096_misalign(b: &mut Bencher) { + memcpy_rust(b, 4096, 65, 66) +} +#[bench] +fn memcpy_builtin_1048576_misalign(b: &mut Bencher) { + memcpy_builtin(b, 1048576, 65, 66) +} +#[bench] +fn memcpy_rust_1048576_misalign(b: &mut Bencher) { + memcpy_rust(b, 1048576, 65, 66) +} + +#[bench] +fn memset_builtin_4096(b: &mut Bencher) { + memset_builtin(b, 4096, 0) +} +#[bench] +fn memset_rust_4096(b: &mut Bencher) { + memset_rust(b, 4096, 0) +} +#[bench] +fn memset_builtin_1048576(b: &mut Bencher) { + memset_builtin(b, 1048576, 0) +} +#[bench] +fn memset_rust_1048576(b: &mut Bencher) { + memset_rust(b, 1048576, 0) +} +#[bench] +fn memset_builtin_4096_offset(b: &mut Bencher) { + memset_builtin(b, 4096, 65) +} +#[bench] +fn memset_rust_4096_offset(b: &mut Bencher) { + memset_rust(b, 4096, 65) +} +#[bench] +fn memset_builtin_1048576_offset(b: &mut Bencher) { + memset_builtin(b, 1048576, 65) +} +#[bench] +fn memset_rust_1048576_offset(b: &mut Bencher) { + memset_rust(b, 1048576, 65) +} + +#[bench] +fn memcmp_builtin_8(b: &mut Bencher) { + memcmp_builtin(b, 8) +} +#[bench] +fn memcmp_rust_8(b: &mut Bencher) { + memcmp_rust(b, 8) +} +#[bench] +fn memcmp_builtin_16(b: &mut Bencher) { + memcmp_builtin(b, 16) +} +#[bench] +fn memcmp_rust_16(b: &mut Bencher) { + memcmp_rust(b, 16) +} +#[bench] +fn memcmp_builtin_32(b: &mut Bencher) { + memcmp_builtin(b, 32) +} +#[bench] +fn memcmp_rust_32(b: &mut Bencher) { + memcmp_rust(b, 32) +} +#[bench] +fn memcmp_builtin_64(b: &mut Bencher) { + memcmp_builtin(b, 64) +} +#[bench] +fn memcmp_rust_64(b: &mut Bencher) { + memcmp_rust(b, 64) +} +#[bench] +fn memcmp_builtin_4096(b: &mut Bencher) { + memcmp_builtin(b, 4096) +} +#[bench] +fn memcmp_rust_4096(b: &mut Bencher) { + memcmp_rust(b, 4096) +} +#[bench] +fn memcmp_builtin_1048576(b: &mut Bencher) { + memcmp_builtin(b, 1048576) +} +#[bench] +fn memcmp_rust_1048576(b: &mut Bencher) { + memcmp_rust(b, 1048576) +} +#[bench] +fn memcmp_builtin_unaligned_7(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 8) +} +#[bench] +fn memcmp_rust_unaligned_7(b: &mut Bencher) { + memcmp_rust_unaligned(b, 8) +} +#[bench] +fn memcmp_builtin_unaligned_15(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 16) +} +#[bench] +fn memcmp_rust_unaligned_15(b: &mut Bencher) { + memcmp_rust_unaligned(b, 16) +} +#[bench] +fn memcmp_builtin_unaligned_31(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 32) +} +#[bench] +fn memcmp_rust_unaligned_31(b: &mut Bencher) { + memcmp_rust_unaligned(b, 32) +} +#[bench] +fn memcmp_builtin_unaligned_63(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 64) +} +#[bench] +fn memcmp_rust_unaligned_63(b: &mut Bencher) { + memcmp_rust_unaligned(b, 64) +} +#[bench] +fn memcmp_builtin_unaligned_4095(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 4096) +} +#[bench] +fn memcmp_rust_unaligned_4095(b: &mut Bencher) { + memcmp_rust_unaligned(b, 4096) +} +#[bench] +fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 1048576) +} +#[bench] +fn memcmp_rust_unaligned_1048575(b: &mut Bencher) { + memcmp_rust_unaligned(b, 1048576) +} + +#[bench] +fn memmove_builtin_4096(b: &mut Bencher) { + memmove_builtin(b, 4096, 0) +} +#[bench] +fn memmove_rust_4096(b: &mut Bencher) { + memmove_rust(b, 4096, 0) +} +#[bench] +fn memmove_builtin_1048576(b: &mut Bencher) { + memmove_builtin(b, 1048576, 0) +} +#[bench] +fn memmove_rust_1048576(b: &mut Bencher) { + memmove_rust(b, 1048576, 0) +} +#[bench] +fn memmove_builtin_4096_misalign(b: &mut Bencher) { + memmove_builtin(b, 4096, 1) +} +#[bench] +fn memmove_rust_4096_misalign(b: &mut Bencher) { + memmove_rust(b, 4096, 1) +} +#[bench] +fn memmove_builtin_1048576_misalign(b: &mut Bencher) { + memmove_builtin(b, 1048576, 1) +} +#[bench] +fn memmove_rust_1048576_misalign(b: &mut Bencher) { + memmove_rust(b, 1048576, 1) +} diff --git a/library/compiler-builtins/builtins-test/benches/mem_icount.rs b/library/compiler-builtins/builtins-test/benches/mem_icount.rs new file mode 100644 index 00000000000..63045f6e1ec --- /dev/null +++ b/library/compiler-builtins/builtins-test/benches/mem_icount.rs @@ -0,0 +1,499 @@ +//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This +//! is stable enough to be tested in CI. + +use std::hint::black_box; +use std::{ops, slice}; + +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; + +const PAGE_SIZE: usize = 0x1000; // 4 kiB +const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day +const MEG1: usize = 1 << 20; // 1 MiB + +#[derive(Clone)] +#[repr(C, align(0x1000))] +struct Page([u8; PAGE_SIZE]); + +/// A buffer that is page-aligned by default, with an optional offset to create a +/// misalignment. +struct AlignedSlice { + buf: Box<[Page]>, + len: usize, + offset: usize, +} + +impl AlignedSlice { + /// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from + /// page alignment. + fn new_zeroed(len: usize, offset: usize) -> Self { + assert!(offset < PAGE_SIZE); + let total_len = len + offset; + let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 }; + let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice(); + AlignedSlice { buf, len, offset } + } +} + +impl ops::Deref for AlignedSlice { + type Target = [u8]; + fn deref(&self) -> &Self::Target { + unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::<u8>().add(self.offset), self.len) } + } +} + +impl ops::DerefMut for AlignedSlice { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { + slice::from_raw_parts_mut( + self.buf.as_mut_ptr().cast::<u8>().add(self.offset), + self.len, + ) + } + } +} + +mod mcpy { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("bytes: {len} bytes, src offset: {s_off}, dst offset: {d_off}"); + let mut src = AlignedSlice::new_zeroed(len, s_off); + let dst = AlignedSlice::new_zeroed(len, d_off); + src.fill(1); + (len, src, dst) + } + + #[library_benchmark] + #[benches::aligned( + // Both aligned + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: MEG1, s_off: 0, d_off: 0 }, + ], + setup = setup, + )] + #[benches::offset( + // Both at the same offset + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: MEG1, s_off: 65, d_off: 65 }, + ], + setup = setup, + )] + #[benches::misaligned( + // `src` and `dst` both misaligned by different amounts + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: MEG1, s_off: 65, d_off: 66 }, + ], + setup = setup, + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcpy( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcpy; benchmarks = bench); +} + +mod mset { + use super::*; + + struct Cfg { + len: usize, + offset: usize, + } + + fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) { + println!("bytes: {len}, offset: {offset}"); + (len, AlignedSlice::new_zeroed(len, offset)) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + Cfg { len: 16, offset: 0 }, + Cfg { len: 32, offset: 0 }, + Cfg { len: 64, offset: 0 }, + Cfg { len: 512, offset: 0 }, + Cfg { len: 4096, offset: 0 }, + Cfg { len: MEG1, offset: 0 }, + ], + setup = setup, + )] + #[benches::offset( + args = [ + Cfg { len: 16, offset: 65 }, + Cfg { len: 32, offset: 65 }, + Cfg { len: 64, offset: 65 }, + Cfg { len: 512, offset: 65 }, + Cfg { len: 4096, offset: 65 }, + Cfg { len: MEG1, offset: 65 }, + ], + setup = setup, + )] + fn bench((len, mut dst): (usize, AlignedSlice)) { + unsafe { + black_box(memset( + black_box(dst.as_mut_ptr()), + black_box(27), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memset; benchmarks = bench); +} + +mod mcmp { + use super::*; + + struct Cfg { + len: usize, + s_off: usize, + d_off: usize, + } + + fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) { + let Cfg { len, s_off, d_off } = cfg; + println!("bytes: {len}, src offset: {s_off}, dst offset: {d_off}"); + let b1 = AlignedSlice::new_zeroed(len, s_off); + let mut b2 = AlignedSlice::new_zeroed(len, d_off); + b2[len - 1] = 1; + (len, b1, b2) + } + + #[library_benchmark] + #[benches::aligned( + // Both aligned + args = [ + Cfg { len: 16, s_off: 0, d_off: 0 }, + Cfg { len: 32, s_off: 0, d_off: 0 }, + Cfg { len: 64, s_off: 0, d_off: 0 }, + Cfg { len: 512, s_off: 0, d_off: 0 }, + Cfg { len: 4096, s_off: 0, d_off: 0 }, + Cfg { len: MEG1, s_off: 0, d_off: 0 }, + ], + setup = setup + )] + #[benches::offset( + // Both at the same offset + args = [ + Cfg { len: 16, s_off: 65, d_off: 65 }, + Cfg { len: 32, s_off: 65, d_off: 65 }, + Cfg { len: 64, s_off: 65, d_off: 65 }, + Cfg { len: 512, s_off: 65, d_off: 65 }, + Cfg { len: 4096, s_off: 65, d_off: 65 }, + Cfg { len: MEG1, s_off: 65, d_off: 65 }, + ], + setup = setup + )] + #[benches::misaligned( + // `src` and `dst` both misaligned by different amounts + args = [ + Cfg { len: 16, s_off: 65, d_off: 66 }, + Cfg { len: 32, s_off: 65, d_off: 66 }, + Cfg { len: 64, s_off: 65, d_off: 66 }, + Cfg { len: 512, s_off: 65, d_off: 66 }, + Cfg { len: 4096, s_off: 65, d_off: 66 }, + Cfg { len: MEG1, s_off: 65, d_off: 66 }, + ], + setup = setup + )] + fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) { + unsafe { + black_box(memcmp( + black_box(dst.as_mut_ptr()), + black_box(src.as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memcmp; benchmarks = bench); +} + +mod mmove { + use super::*; + use Spread::{Aligned, Large, Medium, Small}; + + struct Cfg { + len: usize, + spread: Spread, + off: usize, + } + + enum Spread { + /// `src` and `dst` are close and have the same alignment (or offset). + Aligned, + /// `src` and `dst` are close. + Small, + /// `src` and `dst` are halfway offset in the buffer. + Medium, + /// `src` and `dst` only overlap by a single byte. + Large, + } + + // Note that small and large are + fn calculate_spread(len: usize, spread: Spread) -> usize { + match spread { + // Note that this test doesn't make sense for lengths less than len=128 + Aligned => { + assert!(len > MAX_ALIGN, "aligned memset would have no overlap"); + MAX_ALIGN + } + Small => 1, + Medium => (len / 2) + 1, // add 1 so all are misaligned + Large => len - 1, + } + } + + fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("bytes: {len}, spread: {spread}, offset: {off}, forward"); + assert!(spread < len, "memmove tests should have some overlap"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[..len].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) { + let Cfg { len, spread, off } = cfg; + let spread = calculate_spread(len, spread); + println!("bytes: {len}, spread: {spread}, offset: {off}, backward"); + assert!(spread < len, "memmove tests should have some overlap"); + let mut buf = AlignedSlice::new_zeroed(len + spread, off); + let mut fill: usize = 0; + buf[spread..].fill_with(|| { + fill += 1; + fill as u8 + }); + (len, spread, buf) + } + + #[library_benchmark] + #[benches::aligned( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 0 }, + Cfg { len: MEG1, spread: Aligned, off: 0 }, + ], + setup = setup_forward + )] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: MEG1, spread: Small, off: 0 }, + ], + setup = setup_forward + )] + #[benches::medium_spread( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: MEG1, spread: Medium, off: 0 }, + ], + setup = setup_forward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: MEG1, spread: Large, off: 0 }, + ], + setup = setup_forward + )] + #[benches::aligned_off( + args = [ + Cfg { len: 4096, spread: Aligned, off: 65 }, + Cfg { len: MEG1, spread: Aligned, off: 65 }, + ], + setup = setup_forward + )] + #[benches::small_spread_off( + args = [ + Cfg { len: 16, spread: Small, off: 65 }, + Cfg { len: 32, spread: Small, off: 65 }, + Cfg { len: 64, spread: Small, off: 65 }, + Cfg { len: 512, spread: Small, off: 65 }, + Cfg { len: 4096, spread: Small, off: 65 }, + Cfg { len: MEG1, spread: Small, off: 65 }, + ], + setup = setup_forward + )] + #[benches::medium_spread_off( + args = [ + Cfg { len: 16, spread: Medium, off: 65 }, + Cfg { len: 32, spread: Medium, off: 65 }, + Cfg { len: 64, spread: Medium, off: 65 }, + Cfg { len: 512, spread: Medium, off: 65 }, + Cfg { len: 4096, spread: Medium, off: 65 }, + Cfg { len: MEG1, spread: Medium, off: 65 }, + ], + setup = setup_forward + )] + #[benches::large_spread_off( + args = [ + Cfg { len: 16, spread: Large, off: 65 }, + Cfg { len: 32, spread: Large, off: 65 }, + Cfg { len: 64, spread: Large, off: 65 }, + Cfg { len: 512, spread: Large, off: 65 }, + Cfg { len: 4096, spread: Large, off: 65 }, + Cfg { len: MEG1, spread: Large, off: 65 }, + ], + setup = setup_forward + )] + fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the start of the buffer toward the end + unsafe { + black_box(memmove( + black_box(buf[spread..].as_mut_ptr()), + black_box(buf.as_ptr()), + black_box(len), + )); + } + } + + #[library_benchmark] + #[benches::aligned( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 0 }, + Cfg { len: MEG1, spread: Aligned, off: 0 }, + ], + setup = setup_backward + )] + #[benches::small_spread( + args = [ + Cfg { len: 16, spread: Small, off: 0 }, + Cfg { len: 32, spread: Small, off: 0 }, + Cfg { len: 64, spread: Small, off: 0 }, + Cfg { len: 512, spread: Small, off: 0 }, + Cfg { len: 4096, spread: Small, off: 0 }, + Cfg { len: MEG1, spread: Small, off: 0 }, + ], + setup = setup_backward + )] + #[benches::medium_spread( + args = [ + Cfg { len: 16, spread: Medium, off: 0 }, + Cfg { len: 32, spread: Medium, off: 0 }, + Cfg { len: 64, spread: Medium, off: 0 }, + Cfg { len: 512, spread: Medium, off: 0 }, + Cfg { len: 4096, spread: Medium, off: 0 }, + Cfg { len: MEG1, spread: Medium, off: 0 }, + ], + setup = setup_backward + )] + #[benches::large_spread( + args = [ + Cfg { len: 16, spread: Large, off: 0 }, + Cfg { len: 32, spread: Large, off: 0 }, + Cfg { len: 64, spread: Large, off: 0 }, + Cfg { len: 512, spread: Large, off: 0 }, + Cfg { len: 4096, spread: Large, off: 0 }, + Cfg { len: MEG1, spread: Large, off: 0 }, + ], + setup = setup_backward + )] + #[benches::aligned_off( + args = [ + // Don't test small spreads since there is no overlap + Cfg { len: 4096, spread: Aligned, off: 65 }, + Cfg { len: MEG1, spread: Aligned, off: 65 }, + ], + setup = setup_backward + )] + #[benches::small_spread_off( + args = [ + Cfg { len: 16, spread: Small, off: 65 }, + Cfg { len: 32, spread: Small, off: 65 }, + Cfg { len: 64, spread: Small, off: 65 }, + Cfg { len: 512, spread: Small, off: 65 }, + Cfg { len: 4096, spread: Small, off: 65 }, + Cfg { len: MEG1, spread: Small, off: 65 }, + ], + setup = setup_backward + )] + #[benches::medium_spread_off( + args = [ + Cfg { len: 16, spread: Medium, off: 65 }, + Cfg { len: 32, spread: Medium, off: 65 }, + Cfg { len: 64, spread: Medium, off: 65 }, + Cfg { len: 512, spread: Medium, off: 65 }, + Cfg { len: 4096, spread: Medium, off: 65 }, + Cfg { len: MEG1, spread: Medium, off: 65 }, + ], + setup = setup_backward + )] + #[benches::large_spread_off( + args = [ + Cfg { len: 16, spread: Large, off: 65 }, + Cfg { len: 32, spread: Large, off: 65 }, + Cfg { len: 64, spread: Large, off: 65 }, + Cfg { len: 512, spread: Large, off: 65 }, + Cfg { len: 4096, spread: Large, off: 65 }, + Cfg { len: MEG1, spread: Large, off: 65 }, + ], + setup = setup_backward + )] + fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) { + // Test moving from the end of the buffer toward the start + unsafe { + black_box(memmove( + black_box(buf.as_mut_ptr()), + black_box(buf[spread..].as_ptr()), + black_box(len), + )); + } + } + + library_benchmark_group!(name = memmove; benchmarks = forward, backward); +} + +use mcmp::memcmp; +use mcpy::memcpy; +use mmove::memmove; +use mset::memset; + +main!(library_benchmark_groups = memcpy, memset, memcmp, memmove); diff --git a/library/compiler-builtins/builtins-test/build.rs b/library/compiler-builtins/builtins-test/build.rs new file mode 100644 index 00000000000..e8f4eb4dd22 --- /dev/null +++ b/library/compiler-builtins/builtins-test/build.rs @@ -0,0 +1,120 @@ +use std::collections::HashSet; + +mod builtins_configure { + include!("../compiler-builtins/configure.rs"); +} + +/// Features to enable +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +enum Feature { + NoSysF128, + NoSysF128IntConvert, + NoSysF16, + NoSysF16F64Convert, + NoSysF16F128Convert, +} + +impl Feature { + fn implies(self) -> &'static [Self] { + match self { + Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(), + Self::NoSysF128IntConvert => [].as_slice(), + Self::NoSysF16 => [Self::NoSysF16F64Convert, Self::NoSysF16F128Convert].as_slice(), + Self::NoSysF16F64Convert => [].as_slice(), + Self::NoSysF16F128Convert => [].as_slice(), + } + } +} + +fn main() { + println!("cargo::rerun-if-changed=../configure.rs"); + + let target = builtins_configure::Target::from_env(); + let mut features = HashSet::new(); + + // These platforms do not have f128 symbols available in their system libraries, so + // skip related tests. + if target.arch == "arm" + || target.vendor == "apple" + || target.env == "msvc" + // GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See + // <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>. + || (target.os == "windows" && target.env == "gnu") + // FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86. + // See <https://github.com/llvm/llvm-project/issues/77401>. + || target.arch == "x86" + // 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See + // <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105635926>. + || target.arch == "powerpc" + || target.arch == "powerpc64le" + // FIXME: We get different results from the builtin functions. See + // <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105657287>. + || target.arch == "powerpc64" + { + features.insert(Feature::NoSysF128); + } + + if target.arch == "x86" { + // 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else + features.insert(Feature::NoSysF128IntConvert); + // FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries + features.insert(Feature::NoSysF16F128Convert); + } + + // These platforms do not have f16 symbols available in their system libraries, so + // skip related tests. Most of these are missing `f16 <-> f32` conversion routines. + if (target.arch == "aarch64" && target.os == "linux") + || target.arch.starts_with("arm") + || target.arch == "powerpc" + || target.arch == "powerpc64" + || target.arch == "powerpc64le" + || target.arch == "loongarch64" + || (target.arch == "x86" && !target.has_feature("sse")) + || target.os == "windows" + // Linking says "error: function signature mismatch: __extendhfsf2" and seems to + // think the signature is either `(i32) -> f32` or `(f32) -> f32`. See + // <https://github.com/llvm/llvm-project/issues/96438>. + || target.arch == "wasm32" + || target.arch == "wasm64" + { + features.insert(Feature::NoSysF16); + } + + // These platforms are missing either `__extendhfdf2` or `__truncdfhf2`. + if target.vendor == "apple" || target.os == "windows" { + features.insert(Feature::NoSysF16F64Convert); + } + + // Add implied features. Collection is required for borrows. + features.extend( + features + .iter() + .flat_map(|x| x.implies()) + .copied() + .collect::<Vec<_>>(), + ); + + for feature in features { + let (name, warning) = match feature { + Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), + Feature::NoSysF128IntConvert => ( + "no-sys-f128-int-convert", + "using apfloat fallback for f128 <-> int conversions", + ), + Feature::NoSysF16F64Convert => ( + "no-sys-f16-f64-convert", + "using apfloat fallback for f16 <-> f64 conversions", + ), + Feature::NoSysF16F128Convert => ( + "no-sys-f16-f128-convert", + "using apfloat fallback for f16 <-> f128 conversions", + ), + Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), + }; + println!("cargo:warning={warning}"); + println!("cargo:rustc-cfg=feature=\"{name}\""); + } + + builtins_configure::configure_aliases(&target); + builtins_configure::configure_f16_f128(&target); +} diff --git a/library/compiler-builtins/builtins-test/src/bench.rs b/library/compiler-builtins/builtins-test/src/bench.rs new file mode 100644 index 00000000000..45a3a1ad467 --- /dev/null +++ b/library/compiler-builtins/builtins-test/src/bench.rs @@ -0,0 +1,366 @@ +use core::cell::RefCell; + +use alloc::vec::Vec; +use compiler_builtins::float::Float; + +/// Fuzz with these many items to ensure equal functions +pub const CHECK_ITER_ITEMS: u32 = 10_000; +/// Benchmark with this many items to get a variety +pub const BENCH_ITER_ITEMS: u32 = 500; + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// builtin system functions functions +pub fn skip_sys_checks(test_name: &str) -> bool { + const ALWAYS_SKIPPED: &[&str] = &[ + // FIXME(f16_f128): system symbols have incorrect results + // <https://github.com/rust-lang/compiler-builtins/issues/617> + "extend_f16_f32", + "trunc_f32_f16", + "trunc_f64_f16", + // FIXME(#616): re-enable once fix is in nightly + // <https://github.com/rust-lang/compiler-builtins/issues/616> + "mul_f32", + "mul_f64", + ]; + + // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely + // in their benchmark modules due to runtime panics. + // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639> + const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"]; + + // FIXME(f16_f128): system symbols have incorrect results + // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639> + const X86_NO_SSE_SKIPPED: &[&str] = &[ + "add_f128", "sub_f128", "mul_f128", "div_f128", "powi_f32", "powi_f64", + ]; + + // FIXME(f16_f128): Wide multiply carry bug in `compiler-rt`, re-enable when nightly no longer + // uses `compiler-rt` version. + // <https://github.com/llvm/llvm-project/issues/91840> + const AARCH64_SKIPPED: &[&str] = &["mul_f128", "div_f128"]; + + // FIXME(llvm): system symbols have incorrect results on Windows + // <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807> + const WINDOWS_SKIPPED: &[&str] = &[ + "conv_f32_u128", + "conv_f32_i128", + "conv_f64_u128", + "conv_f64_i128", + ]; + + if cfg!(target_arch = "arm") { + // The Arm symbols need a different ABI that our macro doesn't handle, just skip it + return true; + } + + if ALWAYS_SKIPPED.contains(&test_name) { + return true; + } + + if cfg!(all(target_arch = "powerpc64", target_endian = "little")) + && PPC64LE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(all(target_arch = "x86", not(target_feature = "sse"))) + && X86_NO_SSE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(target_arch = "aarch64") && AARCH64_SKIPPED.contains(&test_name) { + return true; + } + + if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) { + return true; + } + + false +} + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// assembly functions +pub fn skip_asm_checks(_test_name: &str) -> bool { + // Nothing to skip at this time + false +} + +/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten +/// assembly. +/// +/// # Safety +/// +/// The signature must be correct and any assembly must be sound. +#[macro_export] +macro_rules! float_bench { + ( + // Name of this benchmark + name: $name:ident, + // The function signature to be tested + sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty, + // Path to the crate in compiler_builtins + crate_fn: $crate_fn:path, + // Optional alias on ppc + $( crate_fn_ppc: $crate_fn_ppc:path, )? + // Name of the system symbol + sys_fn: $sys_fn:ident, + // Optional alias on ppc + $( sys_fn_ppc: $sys_fn_ppc:path, )? + // Meta saying whether the system symbol is available + sys_available: $sys_available:meta, + // An optional function to validate the results of two functions are equal, if not + // just `$ret_ty::check_eq` + $( output_eq: $output_eq:expr, )? + // Assembly implementations, if any. + asm: [ + $( + #[cfg($asm_meta:meta)] { + $($asm_tt:tt)* + } + );* + $(;)? + ] + $(,)? + ) => {paste::paste! { + // SAFETY: macro invocation must use the correct signature + #[cfg($sys_available)] + unsafe extern "C" { + /// Binding for the system function + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float_bench! { @coalesce_fn $($sys_fn_ppc)? => + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + } + } + + fn $name(c: &mut Criterion) { + use core::hint::black_box; + use compiler_builtins::float::Float; + use $crate::bench::TestIO; + + #[inline(never)] // equalize with external calls + fn crate_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_crate_fn = $crate_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn); + + target_crate_fn( $($arg),* ) + } + + #[inline(always)] // already a branch + #[cfg($sys_available)] + fn sys_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_sys_fn = $sys_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn); + + unsafe { target_sys_fn( $($arg),* ) } + } + + #[inline(never)] // equalize with external calls + #[cfg(any( $($asm_meta),* ))] + fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty { + use core::arch::asm; + $( + #[cfg($asm_meta)] + unsafe { $($asm_tt)* } + )* + } + + let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS); + let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS); + let test_name = stringify!($name); + let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq); + + // Verify math lines up. We run the crate functions even if we don't validate the + // output here to make sure there are no panics or crashes. + + #[cfg($sys_available)] + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let sys_res = sys_fn($($arg),*); + + if $crate::bench::skip_sys_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, sys_res), + "{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}", + ($($arg),* ,) + ); + } + + #[cfg(any( $($asm_meta),* ))] + { + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let asm_res = asm_fn($($arg),*); + + if $crate::bench::skip_asm_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, asm_res), + "{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}", + ($($arg),* ,) + ); + } + } + + let mut group = c.benchmark_group(test_name); + group.bench_function("compiler-builtins", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(crate_fn( $(black_box($arg)),* )); + } + })); + + #[cfg($sys_available)] + group.bench_function("system", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(sys_fn( $(black_box($arg)),* )); + } + })); + + #[cfg(any( $($asm_meta),* ))] + group.bench_function(&format!( + "assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY + ), |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(asm_fn( $(black_box($arg)),* )); + } + })); + + group.finish(); + } + }}; + + // Allow overriding a default + (@coalesce $specified:expr, $default:expr) => { $specified }; + (@coalesce, $default:expr) => { $default }; + + // Allow overriding a function name + (@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => { + fn $specified $($tt)+ + }; + (@coalesce_fn => fn $default_name:ident $($tt:tt)+) => { + fn $default_name $($tt)+ + }; +} + +/// A type used as either an input or output to/from a benchmark function. +pub trait TestIO: Sized { + fn make_testvec(len: u32) -> Vec<Self>; + fn check_eq(a: Self, b: Self) -> bool; +} + +macro_rules! impl_testio { + (float $($f_ty:ty),+) => {$( + impl TestIO for $f_ty { + fn make_testvec(len: u32) -> Vec<Self> { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + Float::eq_repr(a, b) + } + } + + impl TestIO for ($f_ty, $f_ty) { + fn make_testvec(len: u32) -> Vec<Self> { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + (int $($i_ty:ty),+) => {$( + impl TestIO for $i_ty { + fn make_testvec(len: u32) -> Vec<Self> { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + a == b + } + } + + impl TestIO for ($i_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec<Self> { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + ((float, int) ($f_ty:ty, $i_ty:ty)) => { + impl TestIO for ($f_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec<Self> { + // refcell because fuzz_* takes a `Fn` + let ivec = RefCell::new(Vec::new()); + let fvec = RefCell::new(Vec::new()); + + crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a)); + crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a)); + + let mut ret = Vec::new(); + let ivec = ivec.into_inner(); + let fvec = fvec.into_inner(); + + for f in fvec { + for i in &ivec { + ret.push((f, *i)); + } + } + + ret + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + } +} + +#[cfg(f16_enabled)] +impl_testio!(float f16); +impl_testio!(float f32, f64); +#[cfg(f128_enabled)] +impl_testio!(float f128); +impl_testio!(int i16, i32, i64, i128); +impl_testio!(int u16, u32, u64, u128); +impl_testio!((float, int)(f32, i32)); +impl_testio!((float, int)(f64, i32)); +#[cfg(f128_enabled)] +impl_testio!((float, int)(f128, i32)); diff --git a/library/compiler-builtins/builtins-test/src/lib.rs b/library/compiler-builtins/builtins-test/src/lib.rs new file mode 100644 index 00000000000..a83aea56206 --- /dev/null +++ b/library/compiler-builtins/builtins-test/src/lib.rs @@ -0,0 +1,338 @@ +//! This crate is for integration testing and fuzz testing of functions in `compiler-builtins`. This +//! includes publicly documented intrinsics and some internal alternative implementation functions +//! such as `usize_leading_zeros_riscv` (which are tested because they are configured for +//! architectures not tested by the CI). +//! +//! The general idea is to use a combination of edge case testing and randomized fuzz testing. The +//! edge case testing is crucial for checking cases like where both inputs are equal or equal to +//! special values such as `i128::MIN`, which is unlikely for the random fuzzer by itself to +//! encounter. The randomized fuzz testing is specially designed to cover wide swaths of search +//! space in as few iterations as possible. See `fuzz_values` in `builtins-test/tests/misc.rs` for +//! an example. +//! +//! Some floating point tests are disabled for specific architectures, because they do not have +//! correct rounding. +#![no_std] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] + +pub mod bench; +extern crate alloc; + +use compiler_builtins::float::Float; +use compiler_builtins::int::{Int, MinInt}; + +use rand_xoshiro::Xoshiro128StarStar; +use rand_xoshiro::rand_core::{RngCore, SeedableRng}; + +/// Sets the number of fuzz iterations run for most tests. In practice, the vast majority of bugs +/// are caught by the edge case testers. Most of the remaining bugs triggered by more complex +/// sequences are caught well within 10_000 fuzz iterations. For classes of algorithms like division +/// that are vulnerable to rare edge cases, we want 1_000_000 iterations to be more confident. In +/// practical CI, however, we only want to run the more strenuous test once to catch algorithmic +/// level bugs, and run the 10_000 iteration test on most targets. Target-dependent bugs are likely +/// to involve miscompilation and misconfiguration that is likely to break algorithms in quickly +/// caught ways. We choose to configure `N = 1_000_000` iterations for `x86_64` targets (and if +/// debug assertions are disabled. Tests without `--release` would take too long) which are likely +/// to have fast hardware, and run `N = 10_000` for all other targets. +pub const N: u32 = if cfg!(target_arch = "x86_64") && !cfg!(debug_assertions) { + 1_000_000 +} else { + 10_000 +}; + +/// Random fuzzing step. When run several times, it results in excellent fuzzing entropy such as: +/// 11110101010101011110111110011111 +/// 10110101010100001011101011001010 +/// 1000000000000000 +/// 10000000000000110111110000001010 +/// 1111011111111101010101111110101 +/// 101111111110100000000101000000 +/// 10000000110100000000100010101 +/// 1010101010101000 +fn fuzz_step<I: Int>(rng: &mut Xoshiro128StarStar, x: &mut I) { + let ones = !I::ZERO; + let bit_indexing_mask: u32 = I::BITS - 1; + // It happens that all the RNG we need can come from one call. 7 bits are needed to index a + // worst case 128 bit integer, and there are 4 indexes that need to be made plus 4 bits for + // selecting operations + let rng32 = rng.next_u32(); + + // Randomly OR, AND, and XOR randomly sized and shifted continuous strings of + // ones with `lhs` and `rhs`. + let r0 = bit_indexing_mask & rng32; + let r1 = bit_indexing_mask & (rng32 >> 7); + let mask = ones.wrapping_shl(r0).rotate_left(r1); + match (rng32 >> 14) % 4 { + 0 => *x |= mask, + 1 => *x &= mask, + // both 2 and 3 to make XORs as common as ORs and ANDs combined + _ => *x ^= mask, + } + + // Alternating ones and zeros (e.x. 0b1010101010101010). This catches second-order + // problems that might occur for algorithms with two modes of operation (potentially + // there is some invariant that can be broken and maintained via alternating between modes, + // breaking the algorithm when it reaches the end). + let mut alt_ones = I::ONE; + for _ in 0..(I::BITS / 2) { + alt_ones <<= 2; + alt_ones |= I::ONE; + } + let r0 = bit_indexing_mask & (rng32 >> 16); + let r1 = bit_indexing_mask & (rng32 >> 23); + let mask = alt_ones.wrapping_shl(r0).rotate_left(r1); + match rng32 >> 30 { + 0 => *x |= mask, + 1 => *x &= mask, + _ => *x ^= mask, + } +} + +// We need macros like this, because `#![no_std]` prevents us from using iterators +macro_rules! edge_cases { + ($I:ident, $case:ident, $inner:block) => { + for i0 in 0..$I::FUZZ_NUM { + let mask_lo = (!$I::UnsignedInt::ZERO).wrapping_shr($I::FUZZ_LENGTHS[i0] as u32); + for i1 in i0..I::FUZZ_NUM { + let mask_hi = + (!$I::UnsignedInt::ZERO).wrapping_shl($I::FUZZ_LENGTHS[i1 - i0] as u32); + let $case = I::from_unsigned(mask_lo & mask_hi); + $inner + } + } + }; +} + +/// Feeds a series of fuzzing inputs to `f`. The fuzzer first uses an algorithm designed to find +/// edge cases, followed by a more random fuzzer that runs `n` times. +pub fn fuzz<I: Int, F: FnMut(I)>(n: u32, mut f: F) +where + <I as MinInt>::UnsignedInt: Int, +{ + // edge case tester. Calls `f` 210 times for u128. + // zero gets skipped by the loop + f(I::ZERO); + edge_cases!(I, case, { + f(case); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = MinInt::ZERO; + for _ in 0..n { + fuzz_step(&mut rng, &mut x); + f(x) + } +} + +/// The same as `fuzz`, except `f` has two inputs. +pub fn fuzz_2<I: Int, F: Fn(I, I)>(n: u32, f: F) +where + <I as MinInt>::UnsignedInt: Int, +{ + // Check cases where the first and second inputs are zero. Both call `f` 210 times for `u128`. + edge_cases!(I, case, { + f(I::ZERO, case); + }); + edge_cases!(I, case, { + f(case, I::ZERO); + }); + // Nested edge tester. Calls `f` 44100 times for `u128`. + edge_cases!(I, case0, { + edge_cases!(I, case1, { + f(case0, case1); + }) + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = I::ZERO; + let mut y: I = I::ZERO; + for _ in 0..n { + fuzz_step(&mut rng, &mut x); + fuzz_step(&mut rng, &mut y); + f(x, y) + } +} + +/// Tester for shift functions +pub fn fuzz_shift<I: Int, F: Fn(I, u32)>(f: F) { + // Shift functions are very simple and do not need anything other than shifting a small + // set of random patterns for every fuzz length. + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x: I = MinInt::ZERO; + for i in 0..I::FUZZ_NUM { + fuzz_step(&mut rng, &mut x); + f(x, MinInt::ZERO); + f(x, I::FUZZ_LENGTHS[i] as u32); + } +} + +fn fuzz_float_step<F: Float>(rng: &mut Xoshiro128StarStar, f: &mut F) { + let rng32 = rng.next_u32(); + // we need to fuzz the different parts of the float separately, because the masking on larger + // significands will tend to set the exponent to all ones or all zeros frequently + + // sign bit fuzzing + let sign = (rng32 & 1) != 0; + + // exponent fuzzing. Only 4 bits for the selector needed. + let ones = (F::Int::ONE << F::EXP_BITS) - F::Int::ONE; + let r0 = (rng32 >> 1) % F::EXP_BITS; + let r1 = (rng32 >> 5) % F::EXP_BITS; + // custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing + // the sign bit. + let mask = if r1 == 0 { + ones.wrapping_shr(r0) + } else { + let tmp = ones.wrapping_shr(r0); + (tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXP_BITS - r1)) & ones + }; + let mut exp = (f.to_bits() & F::EXP_MASK) >> F::SIG_BITS; + match (rng32 >> 9) % 4 { + 0 => exp |= mask, + 1 => exp &= mask, + _ => exp ^= mask, + } + + // significand fuzzing + let mut sig = f.to_bits() & F::SIG_MASK; + fuzz_step(rng, &mut sig); + sig &= F::SIG_MASK; + + *f = F::from_parts(sign, exp, sig); +} + +macro_rules! float_edge_cases { + ($F:ident, $case:ident, $inner:block) => { + for exponent in [ + F::Int::ZERO, + F::Int::ONE, + F::Int::ONE << (F::EXP_BITS / 2), + (F::Int::ONE << (F::EXP_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::EXP_BITS - 1), + (F::Int::ONE << (F::EXP_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::EXP_BITS) - F::Int::ONE, + ] + .iter() + { + for significand in [ + F::Int::ZERO, + F::Int::ONE, + F::Int::ONE << (F::SIG_BITS / 2), + (F::Int::ONE << (F::SIG_BITS - 1)) - F::Int::ONE, + F::Int::ONE << (F::SIG_BITS - 1), + (F::Int::ONE << (F::SIG_BITS - 1)) + F::Int::ONE, + (F::Int::ONE << F::SIG_BITS) - F::Int::ONE, + ] + .iter() + { + for sign in [false, true].iter() { + let $case = F::from_parts(*sign, *exponent, *significand); + $inner + } + } + } + }; +} + +pub fn fuzz_float<F: Float, E: Fn(F)>(n: u32, f: E) { + float_edge_cases!(F, case, { + f(case); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x = F::ZERO; + for _ in 0..n { + fuzz_float_step(&mut rng, &mut x); + f(x); + } +} + +pub fn fuzz_float_2<F: Float, E: Fn(F, F)>(n: u32, f: E) { + float_edge_cases!(F, case0, { + float_edge_cases!(F, case1, { + f(case0, case1); + }); + }); + + // random fuzzer + let mut rng = Xoshiro128StarStar::seed_from_u64(0); + let mut x = F::ZERO; + let mut y = F::ZERO; + for _ in 0..n { + fuzz_float_step(&mut rng, &mut x); + fuzz_float_step(&mut rng, &mut y); + f(x, y) + } +} + +/// Perform an operation using builtin types if available, falling back to apfloat if not. +#[macro_export] +macro_rules! apfloat_fallback { + ( + $float_ty:ty, + // Type name in `rustc_apfloat::ieee`. Not a full path, it automatically gets the prefix. + $apfloat_ty:ident, + // Cfg expression for when builtin system operations should be used + $sys_available:meta, + // The expression to run. This expression may use `FloatTy` for its signature. + // Optionally, the final conversion back to a float can be suppressed using + // `=> no_convert` (for e.g. operations that return a bool). + // + // If the apfloat needs a different operation, it can be provided here. + $op:expr $(=> $convert:ident)? $(; $apfloat_op:expr)?, + // Arguments that get passed to `$op` after converting to a float + $($arg:expr),+ + $(,)? + ) => {{ + #[cfg($sys_available)] + let ret = { + type FloatTy = $float_ty; + $op( $($arg),+ ) + }; + + #[cfg(not($sys_available))] + let ret = { + use rustc_apfloat::Float; + type FloatTy = rustc_apfloat::ieee::$apfloat_ty; + + apfloat_fallback!(@inner + fty: $float_ty, + // Apply a conversion to `FloatTy` to each arg, then pass all args to `$op` + op_res: $op( $(FloatTy::from_bits($arg.to_bits().into())),+ ), + $(apfloat_op: $apfloat_op, )? + $(conv_opts: $convert,)? + args: $($arg),+ + ) + }; + + ret + }}; + + // Operations that do not need converting back to a float + (@inner fty: $float_ty:ty, op_res: $val:expr, conv_opts: no_convert, args: $($_arg:expr),+) => { + $val + }; + + // Some apfloat operations return a `StatusAnd` that we need to extract the value from. This + // is the default. + (@inner fty: $float_ty:ty, op_res: $val:expr, args: $($_arg:expr),+) => {{ + // ignore the status, just get the value + let unwrapped = $val.value; + + <$float_ty>::from_bits(FloatTy::to_bits(unwrapped).try_into().unwrap()) + }}; + + // This is the case where we can't use the same expression for the default builtin and + // nonstandard apfloat fallback (e.g. `as` casts in std are normal functions in apfloat, so + // two separate expressions must be specified. + (@inner + fty: $float_ty:ty, op_res: $_val:expr, + apfloat_op: $apfloat_op:expr, args: $($arg:expr),+ + ) => {{ + $apfloat_op($($arg),+) + }}; +} diff --git a/library/compiler-builtins/builtins-test/tests/addsub.rs b/library/compiler-builtins/builtins-test/tests/addsub.rs new file mode 100644 index 00000000000..865b9e472ab --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/addsub.rs @@ -0,0 +1,143 @@ +#![allow(unused_macros)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::*; + +mod int_addsub { + use super::*; + + macro_rules! sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + #[test] + fn $fn_add() { + use compiler_builtins::int::addsub::{$fn_add, $fn_sub}; + + fuzz_2(N, |x: $i, y: $i| { + let add0 = x.wrapping_add(y); + let sub0 = x.wrapping_sub(y); + let add1: $i = $fn_add(x, y); + let sub1: $i = $fn_sub(x, y); + if add0 != add1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if sub0 != sub1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + } + )* + }; + } + + macro_rules! overflowing_sum { + ($($i:ty, $fn_add:ident, $fn_sub:ident);*;) => { + $( + #[test] + fn $fn_add() { + use compiler_builtins::int::addsub::{$fn_add, $fn_sub}; + + fuzz_2(N, |x: $i, y: $i| { + let (add0, add_o0)= x.overflowing_add(y); + let (sub0, sub_o0)= x.overflowing_sub(y); + let mut add_o1 = 0; + let mut sub_o1 = 0; + let add1: $i = $fn_add(x, y, &mut add_o1); + let sub1: $i = $fn_sub(x, y, &mut sub_o1); + if add0 != add1 || i32::from(add_o0) != add_o1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_add), x, y, (add0, add_o0) , (add1, add_o1) + ); + } + if sub0 != sub1 || i32::from(sub_o0) != sub_o1 { + panic!( + "{}({}, {}): std: {:?}, builtins: {:?}", + stringify!($fn_sub), x, y, (sub0, sub_o0) , (sub1, sub_o1) + ); + } + }); + } + )* + }; + } + + // Integer addition and subtraction is very simple, so 100 fuzzing passes should be plenty. + sum! { + u128, __rust_u128_add, __rust_u128_sub; + i128, __rust_i128_add, __rust_i128_sub; + } + + overflowing_sum! { + u128, __rust_u128_addo, __rust_u128_subo; + i128, __rust_i128_addo, __rust_i128_subo; + } +} + +macro_rules! float_sum { + ($($f:ty, $fn_add:ident, $fn_sub:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { + $( + #[test] + fn $fn_add() { + use core::ops::{Add, Sub}; + use compiler_builtins::float::{{add::$fn_add, sub::$fn_sub}, Float}; + + fuzz_float_2(N, |x: $f, y: $f| { + let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y); + let sub0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Sub::sub, x, y); + let add1: $f = $fn_add(x, y); + let sub1: $f = $fn_sub(x, y); + if !Float::eq_repr(add0, add1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn_add), x, y, add0, add1 + ); + } + if !Float::eq_repr(sub0, sub1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn_sub), x, y, sub0, sub1 + ); + } + }); + } + )* + } +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +mod float_addsub { + use super::*; + + float_sum! { + f32, __addsf3, __subsf3, Single, all(); + f64, __adddf3, __subdf3, Double, all(); + } +} + +#[cfg(f128_enabled)] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_addsub_f128 { + use super::*; + + float_sum! { + f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128"); + } +} + +#[cfg(f128_enabled)] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_addsub_f128_ppc { + use super::*; + + float_sum! { + f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs b/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs new file mode 100644 index 00000000000..bfd15a391aa --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs @@ -0,0 +1,60 @@ +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +use core::mem; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memclr4(dest: *mut u8, n: usize); + fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new() -> Self { + Aligned { + array: [0; 8], + _alignment: [], + } + } +} + +#[test] +fn memclr4() { + let mut aligned = Aligned::new(); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + + for n in 0..9 { + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, 0xff); + __aeabi_memclr4(xs.as_mut_ptr(), n); + } + + assert!(xs[0..n].iter().all(|x| *x == 0)); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs b/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs new file mode 100644 index 00000000000..c892c5aba0f --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs @@ -0,0 +1,71 @@ +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize); + fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new(array: [u8; 8]) -> Self { + Aligned { + array: array, + _alignment: [], + } + } +} + +#[test] +fn memcpy() { + let mut dest = [0; 4]; + let src = [0xde, 0xad, 0xbe, 0xef]; + + for n in 0..dest.len() { + dest.copy_from_slice(&[0; 4]); + + unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) } + + assert_eq!(&dest[0..n], &src[0..n]) + } +} + +#[test] +fn memcpy4() { + let mut aligned = Aligned::new([0; 8]); + let dest = &mut aligned.array; + let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d]; + + for n in 0..dest.len() { + dest.copy_from_slice(&[0; 8]); + + unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) } + + assert_eq!(&dest[0..n], &src[0..n]) + } +} diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs b/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs new file mode 100644 index 00000000000..34ab3acc78c --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs @@ -0,0 +1,240 @@ +#![cfg(all( + target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem" +))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +use core::mem; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new(array: [u8; 8]) -> Self { + Aligned { + array: array, + _alignment: [], + } + } +} + +#[test] +fn zero() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) } + + assert_eq!(*xs, [0; 8]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), 0, c) } + + assert_eq!(*xs, [1; 8]); +} + +#[test] +fn one() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 1; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]); +} + +#[test] +fn two() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 2; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]); +} + +#[test] +fn three() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 3; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]); +} + +#[test] +fn four() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 4; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]); +} + +#[test] +fn five() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 5; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]); +} + +#[test] +fn six() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 6; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]); +} + +#[test] +fn seven() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 7; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]); +} + +#[test] +fn eight() { + let mut aligned = Aligned::new([0u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 8; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); + + let mut aligned = Aligned::new([1u8; 8]); + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { __aeabi_memset4(xs.as_mut_ptr(), n, c) } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); +} diff --git a/library/compiler-builtins/builtins-test/tests/big.rs b/library/compiler-builtins/builtins-test/tests/big.rs new file mode 100644 index 00000000000..d1ae88bd164 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/big.rs @@ -0,0 +1,134 @@ +use compiler_builtins::int::{HInt, MinInt, i256, u256}; + +const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff; + +/// Print a `u256` as hex since we can't add format implementations +fn hexu(v: u256) -> String { + format!( + "0x{:016x}{:016x}{:016x}{:016x}", + v.0[3], v.0[2], v.0[1], v.0[0] + ) +} + +#[test] +fn widen_u128() { + assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0])); + assert_eq!( + LOHI_SPLIT.widen(), + u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0]) + ); +} + +#[test] +fn widen_i128() { + assert_eq!((-1i128).widen(), u256::MAX.signed()); + assert_eq!( + (LOHI_SPLIT as i128).widen(), + i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX]) + ); + assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen()); +} + +#[test] +fn widen_mul_u128() { + let tests = [ + (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])), + (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])), + (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])), + (u128::MIN, u128::MIN, u256::ZERO), + (1234, 0, u256::ZERO), + (0, 1234, u256::ZERO), + ]; + + let mut errors = Vec::new(); + for (i, (a, b, exp)) in tests.iter().copied().enumerate() { + let res = a.widen_mul(b); + let res_z = a.zero_widen_mul(b); + assert_eq!(res, res_z); + if res != exp { + errors.push((i, a, b, exp, res)); + } + } + + for (i, a, b, exp, res) in &errors { + eprintln!( + "FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}", + hexu(*exp), + hexu(*res) + ); + } + assert!(errors.is_empty()); +} + +#[test] +fn not_u128() { + assert_eq!(!u256::ZERO, u256::MAX); +} + +#[test] +fn shr_u128() { + let only_low = [ + 1, + u16::MAX.into(), + u32::MAX.into(), + u64::MAX.into(), + u128::MAX, + ]; + + let mut errors = Vec::new(); + + for a in only_low { + for perturb in 0..10 { + let a = a.saturating_add(perturb); + for shift in 0..128 { + let res = a.widen() >> shift; + let expected = (a >> shift).widen(); + if res != expected { + errors.push((a.widen(), shift, res, expected)); + } + } + } + } + + let check = [ + ( + u256::MAX, + 1, + u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]), + ), + ( + u256::MAX, + 5, + u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]), + ), + (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])), + (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])), + (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])), + (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])), + (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])), + (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])), + (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])), + (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])), + (u256::MAX, 254, u256([0b11, 0, 0, 0])), + (u256::MAX, 255, u256([1, 0, 0, 0])), + ]; + + for (input, shift, expected) in check { + let res = input >> shift; + if res != expected { + errors.push((input, shift, res, expected)); + } + } + + for (a, b, res, expected) in &errors { + eprintln!( + "FAILURE: {} >> {b} = {} got {}", + hexu(*a), + hexu(*expected), + hexu(*res), + ); + } + assert!(errors.is_empty()); +} diff --git a/library/compiler-builtins/builtins-test/tests/cmp.rs b/library/compiler-builtins/builtins-test/tests/cmp.rs new file mode 100644 index 00000000000..dbedd213e90 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/cmp.rs @@ -0,0 +1,185 @@ +#![allow(unused_macros)] +#![allow(unreachable_code)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::*; + +mod float_comparisons { + use super::*; + + macro_rules! cmp { + ( + $f:ty, $x:ident, $y:ident, $apfloat_ty:ident, $sys_available:meta, + $($unordered_val:expr, $fn:ident);*; + ) => { + $( + let cmp0 = if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x: FloatTy| x.is_nan() => no_convert, + $x + ) || apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |y: FloatTy| y.is_nan() => no_convert, + $y + ) + { + $unordered_val + } else if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x, y| x < y => no_convert, + $x, $y + ) { + -1 + } else if apfloat_fallback!( + $f, $apfloat_ty, $sys_available, + |x, y| x == y => no_convert, + $x, $y + ) { + 0 + } else { + 1 + }; + + let cmp1 = $fn($x, $y); + if cmp0 != cmp1 { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), $x, $y, cmp0, cmp1 + ); + } + )* + }; + } + + #[test] + fn cmp_f32() { + use compiler_builtins::float::cmp::{ + __eqsf2, __gesf2, __gtsf2, __lesf2, __ltsf2, __nesf2, __unordsf2, + }; + + fuzz_float_2(N, |x: f32, y: f32| { + assert_eq!(__unordsf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(f32, x, y, Single, all(), + 1, __ltsf2; + 1, __lesf2; + 1, __eqsf2; + -1, __gesf2; + -1, __gtsf2; + 1, __nesf2; + ); + }); + } + + #[test] + fn cmp_f64() { + use compiler_builtins::float::cmp::{ + __eqdf2, __gedf2, __gtdf2, __ledf2, __ltdf2, __nedf2, __unorddf2, + }; + + fuzz_float_2(N, |x: f64, y: f64| { + assert_eq!(__unorddf2(x, y) != 0, x.is_nan() || y.is_nan()); + cmp!(f64, x, y, Double, all(), + 1, __ltdf2; + 1, __ledf2; + 1, __eqdf2; + -1, __gedf2; + -1, __gtdf2; + 1, __nedf2; + ); + }); + } + + #[test] + #[cfg(f128_enabled)] + fn cmp_f128() { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::cmp::{ + __eqkf2 as __eqtf2, __gekf2 as __getf2, __gtkf2 as __gttf2, __lekf2 as __letf2, + __ltkf2 as __lttf2, __nekf2 as __netf2, __unordkf2 as __unordtf2, + }; + + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::cmp::{ + __eqtf2, __getf2, __gttf2, __letf2, __lttf2, __netf2, __unordtf2, + }; + + fuzz_float_2(N, |x: f128, y: f128| { + let x_is_nan = apfloat_fallback!( + f128, Quad, not(feature = "no-sys-f128"), + |x: FloatTy| x.is_nan() => no_convert, + x + ); + let y_is_nan = apfloat_fallback!( + f128, Quad, not(feature = "no-sys-f128"), + |x: FloatTy| x.is_nan() => no_convert, + y + ); + + assert_eq!(__unordtf2(x, y) != 0, x_is_nan || y_is_nan); + + cmp!(f128, x, y, Quad, not(feature = "no-sys-f128"), + 1, __lttf2; + 1, __letf2; + 1, __eqtf2; + -1, __getf2; + -1, __gttf2; + 1, __netf2; + ); + }); + } +} + +#[cfg(target_arch = "arm")] +mod float_comparisons_arm { + use super::*; + + macro_rules! cmp2 { + ($x:ident, $y:ident, $($unordered_val:expr, $fn_std:expr, $fn_builtins:ident);*;) => { + $( + let cmp0: i32 = if $x.is_nan() || $y.is_nan() { + $unordered_val + } else { + $fn_std as i32 + }; + let cmp1: i32 = $fn_builtins($x, $y); + if cmp0 != cmp1 { + panic!("{}({}, {}): std: {}, builtins: {}", stringify!($fn_builtins), $x, $y, cmp0, cmp1); + } + )* + }; + } + + #[test] + fn cmp_f32() { + use compiler_builtins::float::cmp::{ + __aeabi_fcmpeq, __aeabi_fcmpge, __aeabi_fcmpgt, __aeabi_fcmple, __aeabi_fcmplt, + }; + + fuzz_float_2(N, |x: f32, y: f32| { + cmp2!(x, y, + 0, x < y, __aeabi_fcmplt; + 0, x <= y, __aeabi_fcmple; + 0, x == y, __aeabi_fcmpeq; + 0, x >= y, __aeabi_fcmpge; + 0, x > y, __aeabi_fcmpgt; + ); + }); + } + + #[test] + fn cmp_f64() { + use compiler_builtins::float::cmp::{ + __aeabi_dcmpeq, __aeabi_dcmpge, __aeabi_dcmpgt, __aeabi_dcmple, __aeabi_dcmplt, + }; + + fuzz_float_2(N, |x: f64, y: f64| { + cmp2!(x, y, + 0, x < y, __aeabi_dcmplt; + 0, x <= y, __aeabi_dcmple; + 0, x == y, __aeabi_dcmpeq; + 0, x >= y, __aeabi_dcmpge; + 0, x > y, __aeabi_dcmpgt; + ); + }); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/conv.rs b/library/compiler-builtins/builtins-test/tests/conv.rs new file mode 100644 index 00000000000..491915d9bb1 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/conv.rs @@ -0,0 +1,364 @@ +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg_attr(f16_enabled, feature(f16))] +// makes configuration easier +#![allow(unused_macros)] +#![allow(unused_imports)] + +use builtins_test::*; +use compiler_builtins::float::Float; +use rustc_apfloat::{Float as _, FloatConvert as _}; + +mod i_to_f { + use super::*; + + macro_rules! i_to_f { + ($f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::conv::$fn; + use compiler_builtins::int::Int; + + fuzz(N, |x: $i_ty| { + let f0 = apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, + |x| x as $f_ty; + // When the builtin is not available, we need to use a different conversion + // method (since apfloat doesn't support `as` casting). + |x: $i_ty| { + use compiler_builtins::int::MinInt; + + let apf = if <$i_ty>::SIGNED { + FloatTy::from_i128(x.try_into().unwrap()).value + } else { + FloatTy::from_u128(x.try_into().unwrap()).value + }; + + <$f_ty>::from_bits(apf.to_bits()) + }, + x + ); + let f1: $f_ty = $fn(x); + + #[cfg($sys_available)] { + // This makes sure that the conversion produced the best rounding possible, and does + // this independent of `x as $into` rounding correctly. + // This assumes that float to integer conversion is correct. + let y_minus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_sub(1)) as $i_ty; + let y = f1 as $i_ty; + let y_plus_ulp = <$f_ty>::from_bits(f1.to_bits().wrapping_add(1)) as $i_ty; + let error_minus = <$i_ty as Int>::abs_diff(y_minus_ulp, x); + let error = <$i_ty as Int>::abs_diff(y, x); + let error_plus = <$i_ty as Int>::abs_diff(y_plus_ulp, x); + + // The first two conditions check that none of the two closest float values are + // strictly closer in representation to `x`. The second makes sure that rounding is + // towards even significand if two float values are equally close to the integer. + if error_minus < error + || error_plus < error + || ((error_minus == error || error_plus == error) + && ((f0.to_bits() & 1) != 0)) + { + if !cfg!(any( + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "incorrect rounding by {}({}): {}, ({}, {}, {}), errors ({}, {}, {})", + stringify!($fn), + x, + f1.to_bits(), + y_minus_ulp, + y, + y_plus_ulp, + error_minus, + error, + error_plus, + ); + } + } + } + + // Test against native conversion. We disable testing on all `x86` because of + // rounding bugs with `i686`. `powerpc` also has the same rounding bug. + if !Float::eq_repr(f0, f1) && !cfg!(any( + target_arch = "x86", + target_arch = "powerpc", + target_arch = "powerpc64" + )) { + panic!( + "{}({}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + f0, + f1, + ); + } + }); + } + )* + }; + } + + i_to_f! { f32, Single, all(), + u32, __floatunsisf; + i32, __floatsisf; + u64, __floatundisf; + i64, __floatdisf; + u128, __floatuntisf; + i128, __floattisf; + } + + i_to_f! { f64, Double, all(), + u32, __floatunsidf; + i32, __floatsidf; + u64, __floatundidf; + i64, __floatdidf; + u128, __floatuntidf; + i128, __floattidf; + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsitf; + i32, __floatsitf; + u64, __floatunditf; + i64, __floatditf; + u128, __floatuntitf; + i128, __floattitf; + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"), + u32, __floatunsikf; + i32, __floatsikf; + u64, __floatundikf; + i64, __floatdikf; + u128, __floatuntikf; + i128, __floattikf; + } +} + +mod f_to_i { + use super::*; + + macro_rules! f_to_i { + ($x:ident, $f_ty:ty, $apfloat_ty:ident, $sys_available:meta, $($i_ty:ty, $fn:ident);*;) => { + $( + // it is undefined behavior in the first place to do conversions with NaNs + if !apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, |x: FloatTy| x.is_nan() => no_convert, $x + ) { + let conv0 = apfloat_fallback!( + $f_ty, $apfloat_ty, $sys_available, + // Use an `as` cast when the builtin is available on the system. + |x| x as $i_ty; + // When the builtin is not available, we need to use a different conversion + // method (since apfloat doesn't support `as` casting). + |x: $f_ty| { + use compiler_builtins::int::MinInt; + + let apf = FloatTy::from_bits(x.to_bits().into()); + let bits: usize = <$i_ty>::BITS.try_into().unwrap(); + + let err_fn = || panic!( + "Unable to convert value {x:?} to type {}:", stringify!($i_ty) + ); + + if <$i_ty>::SIGNED { + <$i_ty>::try_from(apf.to_i128(bits).value).ok().unwrap_or_else(err_fn) + } else { + <$i_ty>::try_from(apf.to_u128(bits).value).ok().unwrap_or_else(err_fn) + } + }, + $x + ); + let conv1: $i_ty = $fn($x); + if conv0 != conv1 { + panic!("{}({:?}): std: {:?}, builtins: {:?}", stringify!($fn), $x, conv0, conv1); + } + } + )* + }; + } + + #[test] + fn f32_to_int() { + use compiler_builtins::float::conv::{ + __fixsfdi, __fixsfsi, __fixsfti, __fixunssfdi, __fixunssfsi, __fixunssfti, + }; + + fuzz_float(N, |x: f32| { + f_to_i!(x, f32, Single, all(), + u32, __fixunssfsi; + u64, __fixunssfdi; + u128, __fixunssfti; + i32, __fixsfsi; + i64, __fixsfdi; + i128, __fixsfti; + ); + }); + } + + #[test] + fn f64_to_int() { + use compiler_builtins::float::conv::{ + __fixdfdi, __fixdfsi, __fixdfti, __fixunsdfdi, __fixunsdfsi, __fixunsdfti, + }; + + fuzz_float(N, |x: f64| { + f_to_i!(x, f64, Double, all(), + u32, __fixunsdfsi; + u64, __fixunsdfdi; + u128, __fixunsdfti; + i32, __fixdfsi; + i64, __fixdfdi; + i128, __fixdfti; + ); + }); + } + + #[test] + #[cfg(f128_enabled)] + fn f128_to_int() { + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + use compiler_builtins::float::conv::{ + __fixkfdi as __fixtfdi, __fixkfsi as __fixtfsi, __fixkfti as __fixtfti, + __fixunskfdi as __fixunstfdi, __fixunskfsi as __fixunstfsi, + __fixunskfti as __fixunstfti, + }; + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + use compiler_builtins::float::conv::{ + __fixtfdi, __fixtfsi, __fixtfti, __fixunstfdi, __fixunstfsi, __fixunstfti, + }; + + fuzz_float(N, |x: f128| { + f_to_i!( + x, + f128, + Quad, + not(feature = "no-sys-f128-int-convert"), + u32, __fixunstfsi; + u64, __fixunstfdi; + u128, __fixunstfti; + i32, __fixtfsi; + i64, __fixtfdi; + i128, __fixtfti; + ); + }); + } +} + +macro_rules! f_to_f { + ( + $mod:ident, + $( + $from_ty:ty => $to_ty:ty, + $from_ap_ty:ident => $to_ap_ty:ident, + $fn:ident, $sys_available:meta + );+; + ) => {$( + #[test] + fn $fn() { + use compiler_builtins::float::{$mod::$fn, Float}; + use rustc_apfloat::ieee::{$from_ap_ty, $to_ap_ty}; + + fuzz_float(N, |x: $from_ty| { + let tmp0: $to_ty = apfloat_fallback!( + $from_ty, + $from_ap_ty, + $sys_available, + |x: $from_ty| x as $to_ty; + |x: $from_ty| { + let from_apf = FloatTy::from_bits(x.to_bits().into()); + // Get `value` directly to ignore INVALID_OP + let to_apf: $to_ap_ty = from_apf.convert(&mut false).value; + <$to_ty>::from_bits(to_apf.to_bits().try_into().unwrap()) + }, + x + ); + let tmp1: $to_ty = $fn(x); + + if !Float::eq_repr(tmp0, tmp1) { + panic!( + "{}({:?}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + tmp0, + tmp1 + ); + } + }) + } + )+}; +} + +mod extend { + use super::*; + + f_to_f! { + extend, + f32 => f64, Single => Double, __extendsfdf2, all(); + } + + #[cfg(all(f16_enabled, f128_enabled))] + #[cfg(not(any( + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "loongarch64" + )))] + f_to_f! { + extend, + f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16"); + f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16"); + f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert"); + f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert"); + f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128"); + f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128"); + } + + #[cfg(f128_enabled)] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + f_to_f! { + extend, + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + f32 => f128, Single => Quad, __extendsfkf2, not(feature = "no-sys-f128"); + f64 => f128, Double => Quad, __extenddfkf2, not(feature = "no-sys-f128"); + } +} + +mod trunc { + use super::*; + + f_to_f! { + trunc, + f64 => f32, Double => Single, __truncdfsf2, all(); + } + + #[cfg(all(f16_enabled, f128_enabled))] + #[cfg(not(any( + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "loongarch64" + )))] + f_to_f! { + trunc, + f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16"); + f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16"); + f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert"); + f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert"); + f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128"); + f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128"); + } + + #[cfg(f128_enabled)] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + f_to_f! { + trunc, + // FIXME(#655): `f16` tests disabled until we can bootstrap symbols + f128 => f32, Quad => Single, __trunckfsf2, not(feature = "no-sys-f128"); + f128 => f64, Quad => Double, __trunckfdf2, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/div_rem.rs b/library/compiler-builtins/builtins-test/tests/div_rem.rs new file mode 100644 index 00000000000..6c0280a3286 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/div_rem.rs @@ -0,0 +1,165 @@ +#![feature(f128)] +#![allow(unused_macros)] + +use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4}; +use compiler_builtins::int::udiv::{__udivmoddi4, __udivmodsi4, __udivmodti4, u128_divide_sparc}; + +use builtins_test::*; + +// Division algorithms have by far the nastiest and largest number of edge cases, and experience shows +// that sometimes 100_000 iterations of the random fuzzer is needed. + +/// Creates intensive test functions for division functions of a certain size +macro_rules! test { + ( + $n:expr, // the number of bits in a $iX or $uX + $uX:ident, // unsigned integer that will be shifted + $iX:ident, // signed version of $uX + $test_name:ident, // name of the test function + $unsigned_name:ident, // unsigned division function + $signed_name:ident // signed division function + ) => { + #[test] + fn $test_name() { + fuzz_2(N, |lhs, rhs| { + if rhs == 0 { + return; + } + + let mut rem: $uX = 0; + let quo: $uX = $unsigned_name(lhs, rhs, Some(&mut rem)); + if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) { + panic!( + "unsigned division function failed with lhs:{} rhs:{} \ + std:({}, {}) builtins:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + + // test the signed division function also + let lhs = lhs as $iX; + let rhs = rhs as $iX; + let mut rem: $iX = 0; + let quo: $iX = $signed_name(lhs, rhs, &mut rem); + // We cannot just test that + // `lhs == rhs.wrapping_mul(quo).wrapping_add(rem)`, but also + // need to make sure the remainder isn't larger than the divisor + // and has the correct sign. + let incorrect_rem = if rem == 0 { + false + } else if rhs == $iX::MIN { + // `rhs.wrapping_abs()` would overflow, so handle this case + // separately. + (lhs.is_negative() != rem.is_negative()) || (rem == $iX::MIN) + } else { + (lhs.is_negative() != rem.is_negative()) + || (rhs.wrapping_abs() <= rem.wrapping_abs()) + }; + if incorrect_rem || lhs != rhs.wrapping_mul(quo).wrapping_add(rem) { + panic!( + "signed division function failed with lhs:{} rhs:{} \ + std:({}, {}) builtins:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + }); + } + }; +} + +test!(32, u32, i32, div_rem_si4, __udivmodsi4, __divmodsi4); +test!(64, u64, i64, div_rem_di4, __udivmoddi4, __divmoddi4); +test!(128, u128, i128, div_rem_ti4, __udivmodti4, __divmodti4); + +#[test] +fn divide_sparc() { + fuzz_2(N, |lhs, rhs| { + if rhs == 0 { + return; + } + + let mut rem: u128 = 0; + let quo: u128 = u128_divide_sparc(lhs, rhs, &mut rem); + if rhs <= rem || (lhs != rhs.wrapping_mul(quo).wrapping_add(rem)) { + panic!( + "u128_divide_sparc({}, {}): \ + std:({}, {}), builtins:({}, {})", + lhs, + rhs, + lhs.wrapping_div(rhs), + lhs.wrapping_rem(rhs), + quo, + rem + ); + } + }); +} + +macro_rules! float { + ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::{div::$fn, Float}; + use core::ops::Div; + + fuzz_float_2(N, |x: $f, y: $f| { + let quo0: $f = apfloat_fallback!($f, $apfloat_ty, $sys_available, Div::div, x, y); + let quo1: $f = $fn(x, y); + + // ARM SIMD instructions always flush subnormals to zero + if cfg!(target_arch = "arm") && + ((Float::is_subnormal(quo0)) || Float::is_subnormal(quo1)) { + return; + } + + if !Float::eq_repr(quo0, quo1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), + x, + y, + quo0, + quo1 + ); + } + }); + } + )* + }; +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +mod float_div { + use super::*; + + float! { + f32, __divsf3, Single, all(); + f64, __divdf3, Double, all(); + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + float! { + f128, __divtf3, Quad, + // FIXME(llvm): there is a bug in LLVM rt. + // See <https://github.com/llvm/llvm-project/issues/91840>. + not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); + } + + #[cfg(not(feature = "no-f16-f128"))] + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float! { + f128, __divkf3, Quad, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/float_pow.rs b/library/compiler-builtins/builtins-test/tests/float_pow.rs new file mode 100644 index 00000000000..8209543e666 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/float_pow.rs @@ -0,0 +1,72 @@ +#![allow(unused_macros)] +#![cfg_attr(f128_enabled, feature(f128))] +#![cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] + +use builtins_test::*; + +// This is approximate because of issues related to +// https://github.com/rust-lang/rust/issues/73920. +// TODO how do we resolve this indeterminacy? +macro_rules! pow { + ($($f:ty, $tolerance:expr, $fn:ident, $sys_available:meta);*;) => { + $( + #[test] + // FIXME(apfloat): We skip tests if system symbols aren't available rather + // than providing a fallback, since `rustc_apfloat` does not provide `pow`. + #[cfg($sys_available)] + fn $fn() { + use compiler_builtins::float::pow::$fn; + use compiler_builtins::float::Float; + fuzz_float_2(N, |x: $f, y: $f| { + if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) { + let n = y.to_bits() & !<$f as Float>::SIG_MASK; + let n = (n as <$f as Float>::SignedInt) >> <$f as Float>::SIG_BITS; + let n = n as i32; + let tmp0: $f = x.powi(n); + let tmp1: $f = $fn(x, n); + let (a, b) = if tmp0 < tmp1 { + (tmp0, tmp1) + } else { + (tmp1, tmp0) + }; + + let good = if a == b { + // handles infinity equality + true + } else if a < $tolerance { + b < $tolerance + } else { + let quo = b / a; + (quo < (1. + $tolerance)) && (quo > (1. - $tolerance)) + }; + + assert!( + good, + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), x, n, tmp0, tmp1 + ); + } + }); + } + )* + }; +} + +pow! { + f32, 1e-4, __powisf2, all(); + f64, 1e-12, __powidf2, all(); +} + +#[cfg(f128_enabled)] +// FIXME(f16_f128): MSVC cannot build these until `__divtf3` is available in nightly. +#[cfg(not(target_env = "msvc"))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +pow! { + f128, 1e-36, __powitf2, not(feature = "no-sys-f128"); +} + +#[cfg(f128_enabled)] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +pow! { + f128, 1e-36, __powikf2, not(feature = "no-sys-f128"); +} diff --git a/library/compiler-builtins/builtins-test/tests/lse.rs b/library/compiler-builtins/builtins-test/tests/lse.rs new file mode 100644 index 00000000000..53167d98fc0 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/lse.rs @@ -0,0 +1,97 @@ +#![feature(decl_macro)] // so we can use pub(super) +#![cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm")))] + +/// Translate a byte size to a Rust type. +macro int_ty { + (1) => { i8 }, + (2) => { i16 }, + (4) => { i32 }, + (8) => { i64 }, + (16) => { i128 } +} + +mod cas { + pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { + #[test] + fn $name() { + builtins_test::fuzz_2(10000, |expected: super::int_ty!($bytes), new| { + let mut target = expected.wrapping_add(10); + assert_eq!( + unsafe { + compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target) + }, + expected.wrapping_add(10), + "return value should always be the previous value", + ); + assert_eq!( + target, + expected.wrapping_add(10), + "shouldn't have changed target" + ); + + target = expected; + assert_eq!( + unsafe { + compiler_builtins::aarch64_linux::$name::$name(expected, new, &mut target) + }, + expected + ); + assert_eq!(target, new, "should have updated target"); + }); + } + } +} + +macro test_cas16($_ordering:ident, $name:ident) { + cas::test!($_ordering, 16, $name); +} + +mod swap { + pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) { + #[test] + fn $name() { + builtins_test::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| { + let orig_right = right; + assert_eq!( + unsafe { compiler_builtins::aarch64_linux::$name::$name(left, &mut right) }, + orig_right + ); + assert_eq!(left, right); + }); + } + } +} + +macro_rules! test_op { + ($mod:ident, $( $op:tt )* ) => { + mod $mod { + pub(super) macro test { + ($_ordering:ident, $bytes:tt, $name:ident) => { + #[test] + fn $name() { + builtins_test::fuzz_2(10000, |old, val| { + let mut target = old; + let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*; + let expected = op(old, val); + assert_eq!(old, unsafe { compiler_builtins::aarch64_linux::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name)); + assert_eq!(expected, target, "{} should store to target", stringify!($name)); + }); + } + } + } + } + }; +} + +test_op!(add, |left, right| left.wrapping_add(right)); +test_op!(clr, |left, right| left & !right); +test_op!(xor, std::ops::BitXor::bitxor); +test_op!(or, std::ops::BitOr::bitor); + +compiler_builtins::foreach_cas!(cas::test); +compiler_builtins::foreach_cas16!(test_cas16); +compiler_builtins::foreach_swp!(swap::test); +compiler_builtins::foreach_ldadd!(add::test); +compiler_builtins::foreach_ldclr!(clr::test); +compiler_builtins::foreach_ldeor!(xor::test); +compiler_builtins::foreach_ldset!(or::test); diff --git a/library/compiler-builtins/builtins-test/tests/mem.rs b/library/compiler-builtins/builtins-test/tests/mem.rs new file mode 100644 index 00000000000..d838ef159a0 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/mem.rs @@ -0,0 +1,286 @@ +extern crate compiler_builtins; +use compiler_builtins::mem::{memcmp, memcpy, memmove, memset}; + +const WORD_SIZE: usize = core::mem::size_of::<usize>(); + +#[test] +fn memcpy_3() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(9); + let dst = arr.as_mut_ptr().offset(1); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11]); + } + arr = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(1); + let dst = arr.as_mut_ptr().offset(9); + assert_eq!(memcpy(dst, src, 3), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3]); + } +} + +#[test] +fn memcpy_10() { + let arr: [u8; 18] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]; + let mut dst: [u8; 12] = [0; 12]; + unsafe { + let src = arr.as_ptr().offset(1); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0]); + } + unsafe { + let src = arr.as_ptr().offset(8); + assert_eq!(memcpy(dst.as_mut_ptr(), src, 10), dst.as_mut_ptr()); + assert_eq!(dst, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 0]); + } +} + +#[test] +fn memcpy_big() { + // Make the arrays cross 3 pages + const SIZE: usize = 8193; + let src: [u8; SIZE] = [22; SIZE]; + struct Dst { + start: usize, + buf: [u8; SIZE], + end: usize, + } + + let mut dst = Dst { + start: 0, + buf: [0; SIZE], + end: 0, + }; + unsafe { + assert_eq!( + memcpy(dst.buf.as_mut_ptr(), src.as_ptr(), SIZE), + dst.buf.as_mut_ptr() + ); + assert_eq!(dst.start, 0); + assert_eq!(dst.buf, [22; SIZE]); + assert_eq!(dst.end, 0); + } +} + +#[test] +fn memmove_forward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(6); + let dst = arr.as_mut_ptr().offset(3); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 6, 7, 8, 9, 10, 8, 9, 10, 11]); + } +} + +#[test] +fn memmove_backward() { + let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; + unsafe { + let src = arr.as_ptr().offset(3); + let dst = arr.as_mut_ptr().offset(6); + assert_eq!(memmove(dst, src, 5), dst); + assert_eq!(arr, [0, 1, 2, 3, 4, 5, 3, 4, 5, 6, 7, 11]); + } +} + +#[test] +fn memset_zero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(5); + assert_eq!(memset(ptr, 0, 2), ptr); + assert_eq!(arr, [0, 1, 2, 3, 4, 0, 0, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2000, 8), arr.as_mut_ptr()); + assert_eq!(arr, [0, 0, 0, 0, 0, 0, 0, 0]); + } +} + +#[test] +fn memset_nonzero() { + let mut arr: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + unsafe { + let ptr = arr.as_mut_ptr().offset(2); + assert_eq!(memset(ptr, 22, 3), ptr); + assert_eq!(arr, [0, 1, 22, 22, 22, 5, 6, 7]); + + // Only the LSB matters for a memset + assert_eq!(memset(arr.as_mut_ptr(), 0x2009, 8), arr.as_mut_ptr()); + assert_eq!(arr, [9, 9, 9, 9, 9, 9, 9, 9]); + } +} + +#[test] +fn memcmp_eq() { + let arr1 @ arr2 = gen_arr::<256>(); + for i in 0..256 { + unsafe { + assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0); + assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0); + } + } +} + +#[test] +fn memcmp_ne() { + let arr1 @ arr2 = gen_arr::<256>(); + // Reduce iteration count in Miri as it is too slow otherwise. + let limit = if cfg!(miri) { 64 } else { 256 }; + for i in 0..limit { + let mut diff_arr = arr1; + diff_arr.0[i] = 127; + let expect = diff_arr.0[i].cmp(&arr2.0[i]); + for k in i + 1..limit { + let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) }; + assert_eq!(expect, result.cmp(&0)); + } + } +} + +#[derive(Clone, Copy)] +struct AlignedStorage<const N: usize>([u8; N], [usize; 0]); + +fn gen_arr<const N: usize>() -> AlignedStorage<N> { + let mut ret = AlignedStorage::<N>([0; N], []); + for i in 0..N { + ret.0[i] = i as u8; + } + ret +} + +#[test] +fn memmove_forward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(6); + let dst = arr.0.as_mut_ptr().offset(3); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(6..6 + 17, 3); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_forward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(6); + let dst = arr.0.as_mut_ptr().add(0); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(6..6 + 17, 0); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_forward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().add(3 + WORD_SIZE); + let dst = arr.0.as_mut_ptr().add(3); + assert_eq!(memmove(dst, src, 17), dst); + reference + .0 + .copy_within(3 + WORD_SIZE..3 + WORD_SIZE + 17, 3); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(3); + let dst = arr.0.as_mut_ptr().offset(6); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, 6); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().offset(3); + let dst = arr.0.as_mut_ptr().add(WORD_SIZE); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, WORD_SIZE); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_backward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let src = arr.0.as_ptr().add(3); + let dst = arr.0.as_mut_ptr().add(3 + WORD_SIZE); + assert_eq!(memmove(dst, src, 17), dst); + reference.0.copy_within(3..3 + 17, 3 + WORD_SIZE); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memmove_misaligned_bounds() { + // The above test have the downside that the addresses surrounding the range-to-copy are all + // still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with + // an array that has just the right size. We test a few times to avoid it being accidentally + // aligned. + for _ in 0..8 { + let mut arr1 = [0u8; 17]; + let mut arr2 = [0u8; 17]; + unsafe { + // Copy both ways so we hit both the forward and backward cases. + memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17); + memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17); + } + } +} + +#[test] +fn memset_backward_misaligned_nonaligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().offset(6); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(6), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memset_backward_misaligned_aligned_start() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().add(WORD_SIZE); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(WORD_SIZE), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} + +#[test] +fn memset_backward_aligned() { + let mut arr = gen_arr::<32>(); + let mut reference = arr; + unsafe { + let ptr = arr.0.as_mut_ptr().add(3 + WORD_SIZE); + assert_eq!(memset(ptr, 0xCC, 17), ptr); + core::ptr::write_bytes(reference.0.as_mut_ptr().add(3 + WORD_SIZE), 0xCC, 17); + assert_eq!(arr.0, reference.0); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/misc.rs b/library/compiler-builtins/builtins-test/tests/misc.rs new file mode 100644 index 00000000000..b8c75c02653 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/misc.rs @@ -0,0 +1,208 @@ +// makes configuration easier +#![allow(unused_macros)] + +use builtins_test::*; + +/// Make sure that the the edge case tester and randomized tester don't break, and list examples of +/// fuzz values for documentation purposes. +#[test] +fn fuzz_values() { + const VALS: [u16; 47] = [ + 0b0, // edge cases + 0b1111111111111111, + 0b1111111111111110, + 0b1111111111111100, + 0b1111111110000000, + 0b1111111100000000, + 0b1110000000000000, + 0b1100000000000000, + 0b1000000000000000, + 0b111111111111111, + 0b111111111111110, + 0b111111111111100, + 0b111111110000000, + 0b111111100000000, + 0b110000000000000, + 0b100000000000000, + 0b11111111111111, + 0b11111111111110, + 0b11111111111100, + 0b11111110000000, + 0b11111100000000, + 0b10000000000000, + 0b111111111, + 0b111111110, + 0b111111100, + 0b110000000, + 0b100000000, + 0b11111111, + 0b11111110, + 0b11111100, + 0b10000000, + 0b111, + 0b110, + 0b100, + 0b11, + 0b10, + 0b1, + 0b1010110100000, // beginning of random fuzzing + 0b1100011001011010, + 0b1001100101001111, + 0b1101010100011010, + 0b100010001, + 0b1000000000000000, + 0b1100000000000101, + 0b1100111101010101, + 0b1100010111111111, + 0b1111110101111111, + ]; + let mut i = 0; + fuzz(10, |x: u16| { + assert_eq!(x, VALS[i]); + i += 1; + }); +} + +#[test] +fn leading_zeros() { + use compiler_builtins::int::leading_zeros::{leading_zeros_default, leading_zeros_riscv}; + { + use compiler_builtins::int::leading_zeros::__clzsi2; + fuzz(N, |x: u32| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzsi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzsi2({}): std: {}, builtins: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "leading_zeros_default({}): std: {}, builtins: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzdi2; + fuzz(N, |x: u64| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzdi2(x); + let lz1 = leading_zeros_default(x); + let lz2 = leading_zeros_riscv(x); + if lz0 != lz { + panic!("__clzdi2({}): std: {}, builtins: {}", x, lz, lz0); + } + if lz1 != lz { + panic!( + "leading_zeros_default({}): std: {}, builtins: {}", + x, lz, lz1 + ); + } + if lz2 != lz { + panic!("leading_zeros_riscv({}): std: {}, builtins: {}", x, lz, lz2); + } + }); + } + + { + use compiler_builtins::int::leading_zeros::__clzti2; + fuzz(N, |x: u128| { + if x == 0 { + return; // undefined value for an intrinsic + } + let lz = x.leading_zeros() as usize; + let lz0 = __clzti2(x); + if lz0 != lz { + panic!("__clzti2({}): std: {}, builtins: {}", x, lz, lz0); + } + }); + } +} + +#[test] +fn trailing_zeros() { + use compiler_builtins::int::trailing_zeros::{__ctzdi2, __ctzsi2, __ctzti2, trailing_zeros}; + fuzz(N, |x: u32| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzsi2(x); + let tz1 = trailing_zeros(x); + if tz0 != tz { + panic!("__ctzsi2({}): std: {}, builtins: {}", x, tz, tz0); + } + if tz1 != tz { + panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1); + } + }); + fuzz(N, |x: u64| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzdi2(x); + let tz1 = trailing_zeros(x); + if tz0 != tz { + panic!("__ctzdi2({}): std: {}, builtins: {}", x, tz, tz0); + } + if tz1 != tz { + panic!("trailing_zeros({}): std: {}, builtins: {}", x, tz, tz1); + } + }); + fuzz(N, |x: u128| { + if x == 0 { + return; // undefined value for an intrinsic + } + let tz = x.trailing_zeros() as usize; + let tz0 = __ctzti2(x); + if tz0 != tz { + panic!("__ctzti2({}): std: {}, builtins: {}", x, tz, tz0); + } + }); +} + +#[test] +fn bswap() { + use compiler_builtins::int::bswap::{__bswapdi2, __bswapsi2}; + fuzz(N, |x: u32| { + assert_eq!(x.swap_bytes(), __bswapsi2(x)); + }); + fuzz(N, |x: u64| { + assert_eq!(x.swap_bytes(), __bswapdi2(x)); + }); + + assert_eq!(__bswapsi2(0x12345678u32), 0x78563412u32); + assert_eq!(__bswapsi2(0x00000001u32), 0x01000000u32); + assert_eq!(__bswapdi2(0x123456789ABCDEF0u64), 0xF0DEBC9A78563412u64); + assert_eq!(__bswapdi2(0x0200000001000000u64), 0x0000000100000002u64); + + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + use compiler_builtins::int::bswap::__bswapti2; + fuzz(N, |x: u128| { + assert_eq!(x.swap_bytes(), __bswapti2(x)); + }); + + assert_eq!( + __bswapti2(0x123456789ABCDEF013579BDF02468ACEu128), + 0xCE8A4602DF9B5713F0DEBC9A78563412u128 + ); + assert_eq!( + __bswapti2(0x04000000030000000200000001000000u128), + 0x00000001000000020000000300000004u128 + ); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/mul.rs b/library/compiler-builtins/builtins-test/tests/mul.rs new file mode 100644 index 00000000000..198cacb3489 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/mul.rs @@ -0,0 +1,156 @@ +#![allow(unused_macros)] +#![cfg_attr(f128_enabled, feature(f128))] + +use builtins_test::*; + +mod int_mul { + use super::*; + + macro_rules! mul { + ($($i:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::int::mul::$fn; + + fuzz_2(N, |x: $i, y: $i| { + let mul0 = x.wrapping_mul(y); + let mul1: $i = $fn(x, y); + if mul0 != mul1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn), x, y, mul0, mul1 + ); + } + }); + + } + )* + }; + } + + mul! { + u64, __muldi3; + i128, __multi3; + } +} + +mod int_overflowing_mul { + use super::*; + + macro_rules! overflowing_mul { + ($($i:ty, $fn:ident);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::int::mul::$fn; + + fuzz_2(N, |x: $i, y: $i| { + let (mul0, o0) = x.overflowing_mul(y); + let mut o1 = 0i32; + let mul1: $i = $fn(x, y, &mut o1); + let o1 = o1 != 0; + if mul0 != mul1 || o0 != o1 { + panic!( + "{}({}, {}): std: ({}, {}), builtins: ({}, {})", + stringify!($fn), x, y, mul0, o0, mul1, o1 + ); + } + }); + } + )* + }; + } + + overflowing_mul! { + i32, __mulosi4; + i64, __mulodi4; + i128, __muloti4; + } + + #[test] + fn overflowing_mul_u128() { + use compiler_builtins::int::mul::{__rust_i128_mulo, __rust_u128_mulo}; + + fuzz_2(N, |x: u128, y: u128| { + let mut o1 = 0; + let (mul0, o0) = x.overflowing_mul(y); + let mul1 = __rust_u128_mulo(x, y, &mut o1); + if mul0 != mul1 || i32::from(o0) != o1 { + panic!( + "__rust_u128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", + x, y, mul0, o0, mul1, o1 + ); + } + let x = x as i128; + let y = y as i128; + let (mul0, o0) = x.overflowing_mul(y); + let mul1 = __rust_i128_mulo(x, y, &mut o1); + if mul0 != mul1 || i32::from(o0) != o1 { + panic!( + "__rust_i128_mulo({}, {}): std: ({}, {}), builtins: ({}, {})", + x, y, mul0, o0, mul1, o1 + ); + } + }); + } +} + +macro_rules! float_mul { + ($($f:ty, $fn:ident, $apfloat_ty:ident, $sys_available:meta);*;) => { + $( + #[test] + fn $fn() { + use compiler_builtins::float::{mul::$fn, Float}; + use core::ops::Mul; + + fuzz_float_2(N, |x: $f, y: $f| { + let mul0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Mul::mul, x, y); + let mul1: $f = $fn(x, y); + if !Float::eq_repr(mul0, mul1) { + panic!( + "{}({:?}, {:?}): std: {:?}, builtins: {:?}", + stringify!($fn), x, y, mul0, mul1 + ); + } + }); + } + )* + }; +} + +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +mod float_mul { + use super::*; + + // FIXME(#616): Stop ignoring arches that don't have native support once fix for builtins is in + // nightly. + float_mul! { + f32, __mulsf3, Single, not(target_arch = "arm"); + f64, __muldf3, Double, not(target_arch = "arm"); + } +} + +#[cfg(f128_enabled)] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))] +#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] +mod float_mul_f128 { + use super::*; + + float_mul! { + f128, __multf3, Quad, + // FIXME(llvm): there is a bug in LLVM rt. + // See <https://github.com/llvm/llvm-project/issues/91840>. + not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux"))); + } +} + +#[cfg(f128_enabled)] +#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] +mod float_mul_f128_ppc { + use super::*; + + float_mul! { + f128, __mulkf3, Quad, not(feature = "no-sys-f128"); + } +} diff --git a/library/compiler-builtins/builtins-test/tests/shift.rs b/library/compiler-builtins/builtins-test/tests/shift.rs new file mode 100644 index 00000000000..0f2483855e5 --- /dev/null +++ b/library/compiler-builtins/builtins-test/tests/shift.rs @@ -0,0 +1,35 @@ +use builtins_test::*; + +macro_rules! shift { + ($($i:ty, $fn_std:ident, $fn_builtins:ident);*;) => { + $( + #[test] + fn $fn_builtins() { + use compiler_builtins::int::shift::$fn_builtins; + + fuzz_shift(|x: $i, s: u32| { + let tmp0: $i = x.$fn_std(s); + let tmp1: $i = $fn_builtins(x, s); + if tmp0 != tmp1 { + panic!( + "{}({}, {}): std: {}, builtins: {}", + stringify!($fn_builtins), x, s, tmp0, tmp1 + ); + } + }); + } + )* + }; +} + +shift! { + u32, wrapping_shl, __ashlsi3; + u64, wrapping_shl, __ashldi3; + u128, wrapping_shl, __ashlti3; + i32, wrapping_shr, __ashrsi3; + i64, wrapping_shr, __ashrdi3; + i128, wrapping_shr, __ashrti3; + u32, wrapping_shr, __lshrsi3; + u64, wrapping_shr, __lshrdi3; + u128, wrapping_shr, __lshrti3; +} | 
