diff options
Diffstat (limited to 'tests/codegen-llvm/simd')
| -rw-r--r-- | tests/codegen-llvm/simd/aggregate-simd.rs | 102 | ||||
| -rw-r--r-- | tests/codegen-llvm/simd/extract-insert-dyn.rs | 121 | ||||
| -rw-r--r-- | tests/codegen-llvm/simd/packed-simd-alignment.rs | 44 | ||||
| -rw-r--r-- | tests/codegen-llvm/simd/packed-simd.rs | 56 | ||||
| -rw-r--r-- | tests/codegen-llvm/simd/simd-wide-sum.rs | 59 | ||||
| -rw-r--r-- | tests/codegen-llvm/simd/simd_arith_offset.rs | 22 | ||||
| -rw-r--r-- | tests/codegen-llvm/simd/swap-simd-types.rs | 40 | ||||
| -rw-r--r-- | tests/codegen-llvm/simd/unpadded-simd.rs | 19 |
8 files changed, 463 insertions, 0 deletions
diff --git a/tests/codegen-llvm/simd/aggregate-simd.rs b/tests/codegen-llvm/simd/aggregate-simd.rs new file mode 100644 index 00000000000..57a301d634c --- /dev/null +++ b/tests/codegen-llvm/simd/aggregate-simd.rs @@ -0,0 +1,102 @@ +//@ compile-flags: -C opt-level=3 -C no-prepopulate-passes +//@ only-64bit + +#![feature(core_intrinsics, repr_simd)] +#![no_std] +#![crate_type = "lib"] + +#[path = "../../auxiliary/minisimd.rs"] +mod minisimd; +use core::intrinsics::simd::{simd_add, simd_extract}; + +use minisimd::*; + +#[repr(transparent)] +pub struct Transparent<T>(T); + +// These tests don't actually care about the add/extract, but it ensures the +// aggregated temporaries are only used in potentially-SSA ways. + +#[no_mangle] +pub fn simd_aggregate_pot(x: [u32; 4], y: [u32; 4]) -> u32 { + // CHECK-LABEL: simd_aggregate_pot + // CHECK: %a = load <4 x i32>, ptr %x, align 4 + // CHECK: %b = load <4 x i32>, ptr %y, align 4 + // CHECK: add <4 x i32> %a, %b + + unsafe { + let a = Simd(x); + let b = Simd(y); + let c = simd_add(a, b); + simd_extract(c, 1) + } +} + +#[no_mangle] +pub fn simd_aggregate_npot(x: [u32; 7], y: [u32; 7]) -> u32 { + // CHECK-LABEL: simd_aggregate_npot + // CHECK: %a = load <7 x i32>, ptr %x, align 4 + // CHECK: %b = load <7 x i32>, ptr %y, align 4 + // CHECK: add <7 x i32> %a, %b + + unsafe { + let a = Simd(x); + let b = Simd(y); + let c = simd_add(a, b); + simd_extract(c, 1) + } +} + +#[no_mangle] +pub fn packed_simd_aggregate_pot(x: [u32; 4], y: [u32; 4]) -> u32 { + // CHECK-LABEL: packed_simd_aggregate_pot + // CHECK: %a = load <4 x i32>, ptr %x, align 4 + // CHECK: %b = load <4 x i32>, ptr %y, align 4 + // CHECK: add <4 x i32> %a, %b + + unsafe { + let a = PackedSimd(x); + let b = PackedSimd(y); + let c = simd_add(a, b); + simd_extract(c, 1) + } +} + +#[no_mangle] +pub fn packed_simd_aggregate_npot(x: [u32; 7], y: [u32; 7]) -> u32 { + // CHECK-LABEL: packed_simd_aggregate_npot + // CHECK: %b = alloca [28 x i8], align 4 + // CHECK: %a = alloca [28 x i8], align 4 + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %x, i64 28, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %b, ptr align 4 %y, i64 28, i1 false) + // CHECK: %[[TEMPA:.+]] = load <7 x i32>, ptr %a, align 4 + // CHECK: %[[TEMPB:.+]] = load <7 x i32>, ptr %b, align 4 + // CHECK: add <7 x i32> %[[TEMPA]], %[[TEMPB]] + + unsafe { + let a = PackedSimd(x); + let b = PackedSimd(y); + let c = simd_add(a, b); + simd_extract(c, 1) + } +} + +#[no_mangle] +pub fn transparent_simd_aggregate(x: [u32; 4]) -> u32 { + // The transparent wrapper can just use the same SSA value as its field. + // No extra processing or spilling needed. + + // CHECK-LABEL: transparent_simd_aggregate + // CHECK-NOT: alloca + // CHECK: %[[RET:.+]] = alloca [4 x i8] + // CHECK-NOT: alloca + // CHECK: %a = load <4 x i32>, ptr %x, align 4 + // CHECK: %[[TEMP:.+]] = extractelement <4 x i32> %a, i32 1 + // CHECK: store i32 %[[TEMP]], ptr %[[RET]] + + unsafe { + let a = Simd(x); + let b = Transparent(a); + simd_extract(b.0, 1) + } +} diff --git a/tests/codegen-llvm/simd/extract-insert-dyn.rs b/tests/codegen-llvm/simd/extract-insert-dyn.rs new file mode 100644 index 00000000000..729f0145314 --- /dev/null +++ b/tests/codegen-llvm/simd/extract-insert-dyn.rs @@ -0,0 +1,121 @@ +//@compile-flags: -C opt-level=3 -C no-prepopulate-passes + +#![feature( + core_intrinsics, + repr_simd, + arm_target_feature, + mips_target_feature, + s390x_target_feature +)] +#![no_std] +#![crate_type = "lib"] +#![allow(non_camel_case_types)] + +// Test that `core::intrinsics::simd::{simd_extract_dyn, simd_insert_dyn}` +// lower to an LLVM extractelement or insertelement operation. + +use core::intrinsics::simd::{simd_extract, simd_extract_dyn, simd_insert, simd_insert_dyn}; + +#[repr(simd)] +#[derive(Clone, Copy)] +pub struct u32x16([u32; 16]); + +#[repr(simd)] +#[derive(Clone, Copy)] +pub struct i8x16([i8; 16]); + +// CHECK-LABEL: dyn_simd_extract +// CHECK: extractelement <16 x i8> %x, i32 %idx +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn dyn_simd_extract(x: i8x16, idx: u32) -> i8 { + simd_extract_dyn(x, idx) +} + +// CHECK-LABEL: literal_dyn_simd_extract +// CHECK: extractelement <16 x i8> %x, i32 7 +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn literal_dyn_simd_extract(x: i8x16) -> i8 { + simd_extract_dyn(x, 7) +} + +// CHECK-LABEL: const_dyn_simd_extract +// CHECK: extractelement <16 x i8> %x, i32 7 +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn const_dyn_simd_extract(x: i8x16) -> i8 { + simd_extract_dyn(x, const { 3 + 4 }) +} + +// CHECK-LABEL: const_simd_extract +// CHECK: extractelement <16 x i8> %x, i32 7 +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn const_simd_extract(x: i8x16) -> i8 { + simd_extract(x, const { 3 + 4 }) +} + +// CHECK-LABEL: dyn_simd_insert +// CHECK: insertelement <16 x i8> %x, i8 %e, i32 %idx +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn dyn_simd_insert(x: i8x16, e: i8, idx: u32) -> i8x16 { + simd_insert_dyn(x, idx, e) +} + +// CHECK-LABEL: literal_dyn_simd_insert +// CHECK: insertelement <16 x i8> %x, i8 %e, i32 7 +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn literal_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 { + simd_insert_dyn(x, 7, e) +} + +// CHECK-LABEL: const_dyn_simd_insert +// CHECK: insertelement <16 x i8> %x, i8 %e, i32 7 +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn const_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 { + simd_insert_dyn(x, const { 3 + 4 }, e) +} + +// CHECK-LABEL: const_simd_insert +// CHECK: insertelement <16 x i8> %x, i8 %e, i32 7 +#[no_mangle] +#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] +#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] +#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] +#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] +#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] +unsafe extern "C" fn const_simd_insert(x: i8x16, e: i8) -> i8x16 { + simd_insert(x, const { 3 + 4 }, e) +} diff --git a/tests/codegen-llvm/simd/packed-simd-alignment.rs b/tests/codegen-llvm/simd/packed-simd-alignment.rs new file mode 100644 index 00000000000..53e88d8e5cf --- /dev/null +++ b/tests/codegen-llvm/simd/packed-simd-alignment.rs @@ -0,0 +1,44 @@ +//@ compile-flags: -Cno-prepopulate-passes + +#![crate_type = "lib"] +#![feature(repr_simd, core_intrinsics)] +// make sure that codegen emits correctly-aligned loads and stores for repr(packed, simd) types +// the alignment of a load should be no less than T, and no more than the size of the vector type +use std::intrinsics::simd as intrinsics; + +#[derive(Copy, Clone)] +#[repr(packed, simd)] +struct f32x3([f32; 3]); + +#[derive(Copy, Clone)] +#[repr(packed, simd)] +struct f32x4([f32; 4]); + +// CHECK-LABEL: load_f32x3 +#[no_mangle] +pub fn load_f32x3(floats: &f32x3) -> f32x3 { + // FIXME: Is a memcpy really the best we can do? + // CHECK: @llvm.memcpy.{{.*}}ptr align 4 {{.*}}ptr align 4 + *floats +} + +// CHECK-LABEL: load_f32x4 +#[no_mangle] +pub fn load_f32x4(floats: &f32x4) -> f32x4 { + // CHECK: load <4 x float>, ptr %{{[a-z0-9_]*}}, align {{4|8|16}} + *floats +} + +// CHECK-LABEL: add_f32x3 +#[no_mangle] +pub fn add_f32x3(x: f32x3, y: f32x3) -> f32x3 { + // CHECK: load <3 x float>, ptr %{{[a-z0-9_]*}}, align 4 + unsafe { intrinsics::simd_add(x, y) } +} + +// CHECK-LABEL: add_f32x4 +#[no_mangle] +pub fn add_f32x4(x: f32x4, y: f32x4) -> f32x4 { + // CHECK: load <4 x float>, ptr %{{[a-z0-9_]*}}, align {{4|8|16}} + unsafe { intrinsics::simd_add(x, y) } +} diff --git a/tests/codegen-llvm/simd/packed-simd.rs b/tests/codegen-llvm/simd/packed-simd.rs new file mode 100644 index 00000000000..70c03fcc955 --- /dev/null +++ b/tests/codegen-llvm/simd/packed-simd.rs @@ -0,0 +1,56 @@ +//@ revisions:opt3 noopt +//@ only-x86_64 +//@[opt3] compile-flags: -Copt-level=3 +//@[noopt] compile-flags: -Cno-prepopulate-passes + +#![crate_type = "lib"] +#![no_std] +#![feature(repr_simd, core_intrinsics)] +use core::intrinsics::simd as intrinsics; +use core::{mem, ptr}; + +#[path = "../../auxiliary/minisimd.rs"] +mod minisimd; +use minisimd::{PackedSimd, Simd as FullSimd}; + +// Test codegen for not only "packed" but also "fully aligned" SIMD types, and conversion between +// them. A repr(packed,simd) type with 3 elements can't exceed its element alignment, whereas the +// same type as repr(simd) will instead have padding. + +// non-powers-of-two have padding and need to be expanded to full vectors +fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> { + unsafe { + let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit(); + ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1); + tmp.assume_init() + } +} + +// CHECK-LABEL: square_packed_full +// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] +// CHECK-SAME: ptr{{[a-z_ ]*}} align 4 +#[no_mangle] +pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> { + // CHECK-NEXT: start + // noopt: alloca [[RET_TYPE]], [[RET_ALIGN]] + // CHECK: load <3 x float> + let x = load(x); + // CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> + // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]] + // CHECK-NEXT: ret void + unsafe { intrinsics::simd_mul(x, x) } +} + +// CHECK-LABEL: square_packed +// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] +// CHECK-SAME: ptr{{[a-z_ ]*}} align 4 +#[no_mangle] +pub fn square_packed(x: PackedSimd<f32, 3>) -> PackedSimd<f32, 3> { + // CHECK-NEXT: start + // CHECK-NEXT: load <3 x float> + // noopt-NEXT: load <3 x float> + // CHECK-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> + // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]] + // CHECK-NEXT: ret void + unsafe { intrinsics::simd_mul(x, x) } +} diff --git a/tests/codegen-llvm/simd/simd-wide-sum.rs b/tests/codegen-llvm/simd/simd-wide-sum.rs new file mode 100644 index 00000000000..95117b2c748 --- /dev/null +++ b/tests/codegen-llvm/simd/simd-wide-sum.rs @@ -0,0 +1,59 @@ +//@ revisions: llvm mir-opt3 +//@ compile-flags: -C opt-level=3 -Z merge-functions=disabled +//@ edition: 2021 +//@ only-x86_64 +//@ [mir-opt3]compile-flags: -Zmir-opt-level=3 +//@ [mir-opt3]build-pass + +// mir-opt3 is a regression test for https://github.com/rust-lang/rust/issues/98016 + +#![crate_type = "lib"] +#![feature(portable_simd)] + +use std::simd::prelude::*; +const N: usize = 16; + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_simd +pub fn wider_reduce_simd(x: Simd<u8, N>) -> u16 { + // CHECK: zext <16 x i8> + // CHECK-SAME: to <16 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> + let x: Simd<u16, N> = x.cast(); + x.reduce_sum() +} + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_loop +pub fn wider_reduce_loop(x: Simd<u8, N>) -> u16 { + // CHECK: zext <16 x i8> + // CHECK-SAME: to <16 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> + let mut sum = 0_u16; + for i in 0..N { + sum += u16::from(x[i]); + } + sum +} + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_iter +pub fn wider_reduce_iter(x: Simd<u8, N>) -> u16 { + // CHECK: zext <16 x i8> + // CHECK-SAME: to <16 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> + x.as_array().iter().copied().map(u16::from).sum() +} + +// This iterator one is the most interesting, as it's the one +// which used to not auto-vectorize due to a suboptimality in the +// `<array::IntoIter as Iterator>::fold` implementation. + +#[no_mangle] +// CHECK-LABEL: @wider_reduce_into_iter +pub fn wider_reduce_into_iter(x: Simd<u8, N>) -> u16 { + // CHECK: zext <16 x i8> + // CHECK-SAME: to <16 x i16> + // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> + x.to_array().into_iter().map(u16::from).sum() +} diff --git a/tests/codegen-llvm/simd/simd_arith_offset.rs b/tests/codegen-llvm/simd/simd_arith_offset.rs new file mode 100644 index 00000000000..210b4e9bb50 --- /dev/null +++ b/tests/codegen-llvm/simd/simd_arith_offset.rs @@ -0,0 +1,22 @@ +//@ compile-flags: -C no-prepopulate-passes +//@ only-64bit (because the LLVM type of i64 for usize shows up) +// + +#![crate_type = "lib"] +#![feature(repr_simd, core_intrinsics)] + +#[path = "../../auxiliary/minisimd.rs"] +mod minisimd; +use std::intrinsics::simd::simd_arith_offset; + +use minisimd::*; + +/// A vector of *const T. +pub type SimdConstPtr<T, const LANES: usize> = Simd<*const T, LANES>; + +// CHECK-LABEL: smoke +#[no_mangle] +pub fn smoke(ptrs: SimdConstPtr<u8, 8>, offsets: Simd<usize, 8>) -> SimdConstPtr<u8, 8> { + // CHECK: getelementptr i8, <8 x ptr> %0, <8 x i64> %1 + unsafe { simd_arith_offset(ptrs, offsets) } +} diff --git a/tests/codegen-llvm/simd/swap-simd-types.rs b/tests/codegen-llvm/simd/swap-simd-types.rs new file mode 100644 index 00000000000..c063cc683a6 --- /dev/null +++ b/tests/codegen-llvm/simd/swap-simd-types.rs @@ -0,0 +1,40 @@ +//@ compile-flags: -Copt-level=3 -C target-feature=+avx +//@ only-x86_64 + +#![crate_type = "lib"] + +use std::mem::swap; + +// SIMD types are highly-aligned already, so make sure the swap code leaves their +// types alone and doesn't pessimize them (such as by swapping them as `usize`s). +extern crate core; +use core::arch::x86_64::__m256; + +// CHECK-LABEL: @swap_single_m256 +#[no_mangle] +pub fn swap_single_m256(x: &mut __m256, y: &mut __m256) { + // CHECK-NOT: alloca + // CHECK: load <8 x float>{{.+}}align 32 + // CHECK: store <8 x float>{{.+}}align 32 + swap(x, y) +} + +// CHECK-LABEL: @swap_m256_slice +#[no_mangle] +pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) { + // CHECK-NOT: alloca + // CHECK-COUNT-2: load <4 x i64>{{.+}}align 32 + // CHECK-COUNT-2: store <4 x i64>{{.+}}align 32 + if x.len() == y.len() { + x.swap_with_slice(y); + } +} + +// CHECK-LABEL: @swap_bytes32 +#[no_mangle] +pub fn swap_bytes32(x: &mut [u8; 32], y: &mut [u8; 32]) { + // CHECK-NOT: alloca + // CHECK-COUNT-2: load <4 x i64>{{.+}}align 1 + // CHECK-COUNT-2: store <4 x i64>{{.+}}align 1 + swap(x, y) +} diff --git a/tests/codegen-llvm/simd/unpadded-simd.rs b/tests/codegen-llvm/simd/unpadded-simd.rs new file mode 100644 index 00000000000..ef067a15702 --- /dev/null +++ b/tests/codegen-llvm/simd/unpadded-simd.rs @@ -0,0 +1,19 @@ +// Make sure that no 0-sized padding is inserted in structs and that +// structs are represented as expected by Neon intrinsics in LLVM. +// See #87254. + +#![crate_type = "lib"] +#![feature(repr_simd, abi_unadjusted)] + +#[derive(Copy, Clone)] +#[repr(simd)] +pub struct int16x4_t(pub [i16; 4]); + +#[derive(Copy, Clone)] +pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t); + +// CHECK: %int16x4x2_t = type { <4 x i16>, <4 x i16> } +#[no_mangle] +extern "unadjusted" fn takes_int16x4x2_t(t: int16x4x2_t) -> int16x4x2_t { + t +} |
