diff options
Diffstat (limited to 'src/test/codegen')
| -rw-r--r-- | src/test/codegen/swap-large-types.rs | 64 | ||||
| -rw-r--r-- | src/test/codegen/swap-simd-types.rs | 32 | ||||
| -rw-r--r-- | src/test/codegen/swap-small-types.rs | 44 |
3 files changed, 140 insertions, 0 deletions
diff --git a/src/test/codegen/swap-large-types.rs b/src/test/codegen/swap-large-types.rs new file mode 100644 index 00000000000..535d301a3d2 --- /dev/null +++ b/src/test/codegen/swap-large-types.rs @@ -0,0 +1,64 @@ +// compile-flags: -O +// only-x86_64 +// ignore-debug: the debug assertions get in the way + +#![crate_type = "lib"] + +use std::mem::swap; +use std::ptr::{read, copy_nonoverlapping, write}; + +type KeccakBuffer = [[u64; 5]; 5]; + +// A basic read+copy+write swap implementation ends up copying one of the values +// to stack for large types, which is completely unnecessary as the lack of +// overlap means we can just do whatever fits in registers at a time. + +// CHECK-LABEL: @swap_basic +#[no_mangle] +pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { +// CHECK: alloca [5 x [5 x i64]] + + // SAFETY: exclusive references are always valid to read/write, + // are non-overlapping, and nothing here panics so it's drop-safe. + unsafe { + let z = read(x); + copy_nonoverlapping(y, x, 1); + write(y, z); + } +} + +// This test verifies that the library does something smarter, and thus +// doesn't need any scratch space on the stack. + +// CHECK-LABEL: @swap_std +#[no_mangle] +pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) { +// CHECK-NOT: alloca +// CHECK: load <{{[0-9]+}} x i64> +// CHECK: store <{{[0-9]+}} x i64> + swap(x, y) +} + +// CHECK-LABEL: @swap_slice +#[no_mangle] +pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) { +// CHECK-NOT: alloca +// CHECK: load <{{[0-9]+}} x i64> +// CHECK: store <{{[0-9]+}} x i64> + if x.len() == y.len() { + x.swap_with_slice(y); + } +} + +type OneKilobyteBuffer = [u8; 1024]; + +// CHECK-LABEL: @swap_1kb_slices +#[no_mangle] +pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) { +// CHECK-NOT: alloca +// CHECK: load <{{[0-9]+}} x i8> +// CHECK: store <{{[0-9]+}} x i8> + if x.len() == y.len() { + x.swap_with_slice(y); + } +} diff --git a/src/test/codegen/swap-simd-types.rs b/src/test/codegen/swap-simd-types.rs new file mode 100644 index 00000000000..c90b277eb44 --- /dev/null +++ b/src/test/codegen/swap-simd-types.rs @@ -0,0 +1,32 @@ +// compile-flags: -O -C target-feature=+avx +// only-x86_64 +// ignore-debug: the debug assertions get in the way + +#![crate_type = "lib"] + +use std::mem::swap; + +// SIMD types are highly-aligned already, so make sure the swap code leaves their +// types alone and doesn't pessimize them (such as by swapping them as `usize`s). +extern crate core; +use core::arch::x86_64::__m256; + +// CHECK-LABEL: @swap_single_m256 +#[no_mangle] +pub fn swap_single_m256(x: &mut __m256, y: &mut __m256) { +// CHECK-NOT: alloca +// CHECK: load <8 x float>{{.+}}align 32 +// CHECK: store <8 x float>{{.+}}align 32 + swap(x, y) +} + +// CHECK-LABEL: @swap_m256_slice +#[no_mangle] +pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) { +// CHECK-NOT: alloca +// CHECK: load <8 x float>{{.+}}align 32 +// CHECK: store <8 x float>{{.+}}align 32 + if x.len() == y.len() { + x.swap_with_slice(y); + } +} diff --git a/src/test/codegen/swap-small-types.rs b/src/test/codegen/swap-small-types.rs index 6205e6a6559..2f375844cc7 100644 --- a/src/test/codegen/swap-small-types.rs +++ b/src/test/codegen/swap-small-types.rs @@ -16,3 +16,47 @@ pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) { // CHECK: store i48 swap(x, y) } + +// LLVM doesn't vectorize a loop over 3-byte elements, +// so we chunk it down to bytes and loop over those instead. +type RGB24 = [u8; 3]; + +// CHECK-LABEL: @swap_rgb24_slices +#[no_mangle] +pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) { +// CHECK-NOT: alloca +// CHECK: load <{{[0-9]+}} x i8> +// CHECK: store <{{[0-9]+}} x i8> + if x.len() == y.len() { + x.swap_with_slice(y); + } +} + +// This one has a power-of-two size, so we iterate over it directly +type RGBA32 = [u8; 4]; + +// CHECK-LABEL: @swap_rgba32_slices +#[no_mangle] +pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) { +// CHECK-NOT: alloca +// CHECK: load <{{[0-9]+}} x i32> +// CHECK: store <{{[0-9]+}} x i32> + if x.len() == y.len() { + x.swap_with_slice(y); + } +} + +// Strings have a non-power-of-two size, but have pointer alignment, +// so we swap usizes instead of dropping all the way down to bytes. +const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two()); + +// CHECK-LABEL: @swap_string_slices +#[no_mangle] +pub fn swap_string_slices(x: &mut [String], y: &mut [String]) { +// CHECK-NOT: alloca +// CHECK: load <{{[0-9]+}} x i64> +// CHECK: store <{{[0-9]+}} x i64> + if x.len() == y.len() { + x.swap_with_slice(y); + } +} |
