diff options
Diffstat (limited to 'tests/codegen/simd')
| -rw-r--r-- | tests/codegen/simd/aggregate-simd.rs | 102 | ||||
| -rw-r--r-- | tests/codegen/simd/extract-insert-dyn.rs | 121 | ||||
| -rw-r--r-- | tests/codegen/simd/packed-simd-alignment.rs | 44 | ||||
| -rw-r--r-- | tests/codegen/simd/packed-simd.rs | 56 | ||||
| -rw-r--r-- | tests/codegen/simd/simd-wide-sum.rs | 59 | ||||
| -rw-r--r-- | tests/codegen/simd/simd_arith_offset.rs | 22 | ||||
| -rw-r--r-- | tests/codegen/simd/swap-simd-types.rs | 40 | ||||
| -rw-r--r-- | tests/codegen/simd/unpadded-simd.rs | 19 |
8 files changed, 0 insertions, 463 deletions
diff --git a/tests/codegen/simd/aggregate-simd.rs b/tests/codegen/simd/aggregate-simd.rs deleted file mode 100644 index 57a301d634c..00000000000 --- a/tests/codegen/simd/aggregate-simd.rs +++ /dev/null @@ -1,102 +0,0 @@ -//@ compile-flags: -C opt-level=3 -C no-prepopulate-passes -//@ only-64bit - -#![feature(core_intrinsics, repr_simd)] -#![no_std] -#![crate_type = "lib"] - -#[path = "../../auxiliary/minisimd.rs"] -mod minisimd; -use core::intrinsics::simd::{simd_add, simd_extract}; - -use minisimd::*; - -#[repr(transparent)] -pub struct Transparent<T>(T); - -// These tests don't actually care about the add/extract, but it ensures the -// aggregated temporaries are only used in potentially-SSA ways. - -#[no_mangle] -pub fn simd_aggregate_pot(x: [u32; 4], y: [u32; 4]) -> u32 { - // CHECK-LABEL: simd_aggregate_pot - // CHECK: %a = load <4 x i32>, ptr %x, align 4 - // CHECK: %b = load <4 x i32>, ptr %y, align 4 - // CHECK: add <4 x i32> %a, %b - - unsafe { - let a = Simd(x); - let b = Simd(y); - let c = simd_add(a, b); - simd_extract(c, 1) - } -} - -#[no_mangle] -pub fn simd_aggregate_npot(x: [u32; 7], y: [u32; 7]) -> u32 { - // CHECK-LABEL: simd_aggregate_npot - // CHECK: %a = load <7 x i32>, ptr %x, align 4 - // CHECK: %b = load <7 x i32>, ptr %y, align 4 - // CHECK: add <7 x i32> %a, %b - - unsafe { - let a = Simd(x); - let b = Simd(y); - let c = simd_add(a, b); - simd_extract(c, 1) - } -} - -#[no_mangle] -pub fn packed_simd_aggregate_pot(x: [u32; 4], y: [u32; 4]) -> u32 { - // CHECK-LABEL: packed_simd_aggregate_pot - // CHECK: %a = load <4 x i32>, ptr %x, align 4 - // CHECK: %b = load <4 x i32>, ptr %y, align 4 - // CHECK: add <4 x i32> %a, %b - - unsafe { - let a = PackedSimd(x); - let b = PackedSimd(y); - let c = simd_add(a, b); - simd_extract(c, 1) - } -} - -#[no_mangle] -pub fn packed_simd_aggregate_npot(x: [u32; 7], y: [u32; 7]) -> u32 { - // CHECK-LABEL: packed_simd_aggregate_npot - // CHECK: %b = alloca [28 x i8], align 4 - // CHECK: %a = alloca [28 x i8], align 4 - // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %x, i64 28, i1 false) - // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %b, ptr align 4 %y, i64 28, i1 false) - // CHECK: %[[TEMPA:.+]] = load <7 x i32>, ptr %a, align 4 - // CHECK: %[[TEMPB:.+]] = load <7 x i32>, ptr %b, align 4 - // CHECK: add <7 x i32> %[[TEMPA]], %[[TEMPB]] - - unsafe { - let a = PackedSimd(x); - let b = PackedSimd(y); - let c = simd_add(a, b); - simd_extract(c, 1) - } -} - -#[no_mangle] -pub fn transparent_simd_aggregate(x: [u32; 4]) -> u32 { - // The transparent wrapper can just use the same SSA value as its field. - // No extra processing or spilling needed. - - // CHECK-LABEL: transparent_simd_aggregate - // CHECK-NOT: alloca - // CHECK: %[[RET:.+]] = alloca [4 x i8] - // CHECK-NOT: alloca - // CHECK: %a = load <4 x i32>, ptr %x, align 4 - // CHECK: %[[TEMP:.+]] = extractelement <4 x i32> %a, i32 1 - // CHECK: store i32 %[[TEMP]], ptr %[[RET]] - - unsafe { - let a = Simd(x); - let b = Transparent(a); - simd_extract(b.0, 1) - } -} diff --git a/tests/codegen/simd/extract-insert-dyn.rs b/tests/codegen/simd/extract-insert-dyn.rs deleted file mode 100644 index 729f0145314..00000000000 --- a/tests/codegen/simd/extract-insert-dyn.rs +++ /dev/null @@ -1,121 +0,0 @@ -//@compile-flags: -C opt-level=3 -C no-prepopulate-passes - -#![feature( - core_intrinsics, - repr_simd, - arm_target_feature, - mips_target_feature, - s390x_target_feature -)] -#![no_std] -#![crate_type = "lib"] -#![allow(non_camel_case_types)] - -// Test that `core::intrinsics::simd::{simd_extract_dyn, simd_insert_dyn}` -// lower to an LLVM extractelement or insertelement operation. - -use core::intrinsics::simd::{simd_extract, simd_extract_dyn, simd_insert, simd_insert_dyn}; - -#[repr(simd)] -#[derive(Clone, Copy)] -pub struct u32x16([u32; 16]); - -#[repr(simd)] -#[derive(Clone, Copy)] -pub struct i8x16([i8; 16]); - -// CHECK-LABEL: dyn_simd_extract -// CHECK: extractelement <16 x i8> %x, i32 %idx -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn dyn_simd_extract(x: i8x16, idx: u32) -> i8 { - simd_extract_dyn(x, idx) -} - -// CHECK-LABEL: literal_dyn_simd_extract -// CHECK: extractelement <16 x i8> %x, i32 7 -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn literal_dyn_simd_extract(x: i8x16) -> i8 { - simd_extract_dyn(x, 7) -} - -// CHECK-LABEL: const_dyn_simd_extract -// CHECK: extractelement <16 x i8> %x, i32 7 -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn const_dyn_simd_extract(x: i8x16) -> i8 { - simd_extract_dyn(x, const { 3 + 4 }) -} - -// CHECK-LABEL: const_simd_extract -// CHECK: extractelement <16 x i8> %x, i32 7 -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn const_simd_extract(x: i8x16) -> i8 { - simd_extract(x, const { 3 + 4 }) -} - -// CHECK-LABEL: dyn_simd_insert -// CHECK: insertelement <16 x i8> %x, i8 %e, i32 %idx -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn dyn_simd_insert(x: i8x16, e: i8, idx: u32) -> i8x16 { - simd_insert_dyn(x, idx, e) -} - -// CHECK-LABEL: literal_dyn_simd_insert -// CHECK: insertelement <16 x i8> %x, i8 %e, i32 7 -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn literal_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 { - simd_insert_dyn(x, 7, e) -} - -// CHECK-LABEL: const_dyn_simd_insert -// CHECK: insertelement <16 x i8> %x, i8 %e, i32 7 -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn const_dyn_simd_insert(x: i8x16, e: i8) -> i8x16 { - simd_insert_dyn(x, const { 3 + 4 }, e) -} - -// CHECK-LABEL: const_simd_insert -// CHECK: insertelement <16 x i8> %x, i8 %e, i32 7 -#[no_mangle] -#[cfg_attr(target_family = "wasm", target_feature(enable = "simd128"))] -#[cfg_attr(target_arch = "arm", target_feature(enable = "neon"))] -#[cfg_attr(target_arch = "x86", target_feature(enable = "sse"))] -#[cfg_attr(target_arch = "mips", target_feature(enable = "msa"))] -#[cfg_attr(target_arch = "s390x", target_feature(enable = "vector"))] -unsafe extern "C" fn const_simd_insert(x: i8x16, e: i8) -> i8x16 { - simd_insert(x, const { 3 + 4 }, e) -} diff --git a/tests/codegen/simd/packed-simd-alignment.rs b/tests/codegen/simd/packed-simd-alignment.rs deleted file mode 100644 index 53e88d8e5cf..00000000000 --- a/tests/codegen/simd/packed-simd-alignment.rs +++ /dev/null @@ -1,44 +0,0 @@ -//@ compile-flags: -Cno-prepopulate-passes - -#![crate_type = "lib"] -#![feature(repr_simd, core_intrinsics)] -// make sure that codegen emits correctly-aligned loads and stores for repr(packed, simd) types -// the alignment of a load should be no less than T, and no more than the size of the vector type -use std::intrinsics::simd as intrinsics; - -#[derive(Copy, Clone)] -#[repr(packed, simd)] -struct f32x3([f32; 3]); - -#[derive(Copy, Clone)] -#[repr(packed, simd)] -struct f32x4([f32; 4]); - -// CHECK-LABEL: load_f32x3 -#[no_mangle] -pub fn load_f32x3(floats: &f32x3) -> f32x3 { - // FIXME: Is a memcpy really the best we can do? - // CHECK: @llvm.memcpy.{{.*}}ptr align 4 {{.*}}ptr align 4 - *floats -} - -// CHECK-LABEL: load_f32x4 -#[no_mangle] -pub fn load_f32x4(floats: &f32x4) -> f32x4 { - // CHECK: load <4 x float>, ptr %{{[a-z0-9_]*}}, align {{4|8|16}} - *floats -} - -// CHECK-LABEL: add_f32x3 -#[no_mangle] -pub fn add_f32x3(x: f32x3, y: f32x3) -> f32x3 { - // CHECK: load <3 x float>, ptr %{{[a-z0-9_]*}}, align 4 - unsafe { intrinsics::simd_add(x, y) } -} - -// CHECK-LABEL: add_f32x4 -#[no_mangle] -pub fn add_f32x4(x: f32x4, y: f32x4) -> f32x4 { - // CHECK: load <4 x float>, ptr %{{[a-z0-9_]*}}, align {{4|8|16}} - unsafe { intrinsics::simd_add(x, y) } -} diff --git a/tests/codegen/simd/packed-simd.rs b/tests/codegen/simd/packed-simd.rs deleted file mode 100644 index 70c03fcc955..00000000000 --- a/tests/codegen/simd/packed-simd.rs +++ /dev/null @@ -1,56 +0,0 @@ -//@ revisions:opt3 noopt -//@ only-x86_64 -//@[opt3] compile-flags: -Copt-level=3 -//@[noopt] compile-flags: -Cno-prepopulate-passes - -#![crate_type = "lib"] -#![no_std] -#![feature(repr_simd, core_intrinsics)] -use core::intrinsics::simd as intrinsics; -use core::{mem, ptr}; - -#[path = "../../auxiliary/minisimd.rs"] -mod minisimd; -use minisimd::{PackedSimd, Simd as FullSimd}; - -// Test codegen for not only "packed" but also "fully aligned" SIMD types, and conversion between -// them. A repr(packed,simd) type with 3 elements can't exceed its element alignment, whereas the -// same type as repr(simd) will instead have padding. - -// non-powers-of-two have padding and need to be expanded to full vectors -fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> { - unsafe { - let mut tmp = mem::MaybeUninit::<FullSimd<T, N>>::uninit(); - ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1); - tmp.assume_init() - } -} - -// CHECK-LABEL: square_packed_full -// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] -// CHECK-SAME: ptr{{[a-z_ ]*}} align 4 -#[no_mangle] -pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> { - // CHECK-NEXT: start - // noopt: alloca [[RET_TYPE]], [[RET_ALIGN]] - // CHECK: load <3 x float> - let x = load(x); - // CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> - // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]] - // CHECK-NEXT: ret void - unsafe { intrinsics::simd_mul(x, x) } -} - -// CHECK-LABEL: square_packed -// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align 4]]{{[^%]*}} [[RET_VREG:%[_0-9]*]] -// CHECK-SAME: ptr{{[a-z_ ]*}} align 4 -#[no_mangle] -pub fn square_packed(x: PackedSimd<f32, 3>) -> PackedSimd<f32, 3> { - // CHECK-NEXT: start - // CHECK-NEXT: load <3 x float> - // noopt-NEXT: load <3 x float> - // CHECK-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float> - // CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]] - // CHECK-NEXT: ret void - unsafe { intrinsics::simd_mul(x, x) } -} diff --git a/tests/codegen/simd/simd-wide-sum.rs b/tests/codegen/simd/simd-wide-sum.rs deleted file mode 100644 index 95117b2c748..00000000000 --- a/tests/codegen/simd/simd-wide-sum.rs +++ /dev/null @@ -1,59 +0,0 @@ -//@ revisions: llvm mir-opt3 -//@ compile-flags: -C opt-level=3 -Z merge-functions=disabled -//@ edition: 2021 -//@ only-x86_64 -//@ [mir-opt3]compile-flags: -Zmir-opt-level=3 -//@ [mir-opt3]build-pass - -// mir-opt3 is a regression test for https://github.com/rust-lang/rust/issues/98016 - -#![crate_type = "lib"] -#![feature(portable_simd)] - -use std::simd::prelude::*; -const N: usize = 16; - -#[no_mangle] -// CHECK-LABEL: @wider_reduce_simd -pub fn wider_reduce_simd(x: Simd<u8, N>) -> u16 { - // CHECK: zext <16 x i8> - // CHECK-SAME: to <16 x i16> - // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> - let x: Simd<u16, N> = x.cast(); - x.reduce_sum() -} - -#[no_mangle] -// CHECK-LABEL: @wider_reduce_loop -pub fn wider_reduce_loop(x: Simd<u8, N>) -> u16 { - // CHECK: zext <16 x i8> - // CHECK-SAME: to <16 x i16> - // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> - let mut sum = 0_u16; - for i in 0..N { - sum += u16::from(x[i]); - } - sum -} - -#[no_mangle] -// CHECK-LABEL: @wider_reduce_iter -pub fn wider_reduce_iter(x: Simd<u8, N>) -> u16 { - // CHECK: zext <16 x i8> - // CHECK-SAME: to <16 x i16> - // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> - x.as_array().iter().copied().map(u16::from).sum() -} - -// This iterator one is the most interesting, as it's the one -// which used to not auto-vectorize due to a suboptimality in the -// `<array::IntoIter as Iterator>::fold` implementation. - -#[no_mangle] -// CHECK-LABEL: @wider_reduce_into_iter -pub fn wider_reduce_into_iter(x: Simd<u8, N>) -> u16 { - // CHECK: zext <16 x i8> - // CHECK-SAME: to <16 x i16> - // CHECK: call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> - x.to_array().into_iter().map(u16::from).sum() -} diff --git a/tests/codegen/simd/simd_arith_offset.rs b/tests/codegen/simd/simd_arith_offset.rs deleted file mode 100644 index 210b4e9bb50..00000000000 --- a/tests/codegen/simd/simd_arith_offset.rs +++ /dev/null @@ -1,22 +0,0 @@ -//@ compile-flags: -C no-prepopulate-passes -//@ only-64bit (because the LLVM type of i64 for usize shows up) -// - -#![crate_type = "lib"] -#![feature(repr_simd, core_intrinsics)] - -#[path = "../../auxiliary/minisimd.rs"] -mod minisimd; -use std::intrinsics::simd::simd_arith_offset; - -use minisimd::*; - -/// A vector of *const T. -pub type SimdConstPtr<T, const LANES: usize> = Simd<*const T, LANES>; - -// CHECK-LABEL: smoke -#[no_mangle] -pub fn smoke(ptrs: SimdConstPtr<u8, 8>, offsets: Simd<usize, 8>) -> SimdConstPtr<u8, 8> { - // CHECK: getelementptr i8, <8 x ptr> %0, <8 x i64> %1 - unsafe { simd_arith_offset(ptrs, offsets) } -} diff --git a/tests/codegen/simd/swap-simd-types.rs b/tests/codegen/simd/swap-simd-types.rs deleted file mode 100644 index c063cc683a6..00000000000 --- a/tests/codegen/simd/swap-simd-types.rs +++ /dev/null @@ -1,40 +0,0 @@ -//@ compile-flags: -Copt-level=3 -C target-feature=+avx -//@ only-x86_64 - -#![crate_type = "lib"] - -use std::mem::swap; - -// SIMD types are highly-aligned already, so make sure the swap code leaves their -// types alone and doesn't pessimize them (such as by swapping them as `usize`s). -extern crate core; -use core::arch::x86_64::__m256; - -// CHECK-LABEL: @swap_single_m256 -#[no_mangle] -pub fn swap_single_m256(x: &mut __m256, y: &mut __m256) { - // CHECK-NOT: alloca - // CHECK: load <8 x float>{{.+}}align 32 - // CHECK: store <8 x float>{{.+}}align 32 - swap(x, y) -} - -// CHECK-LABEL: @swap_m256_slice -#[no_mangle] -pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) { - // CHECK-NOT: alloca - // CHECK-COUNT-2: load <4 x i64>{{.+}}align 32 - // CHECK-COUNT-2: store <4 x i64>{{.+}}align 32 - if x.len() == y.len() { - x.swap_with_slice(y); - } -} - -// CHECK-LABEL: @swap_bytes32 -#[no_mangle] -pub fn swap_bytes32(x: &mut [u8; 32], y: &mut [u8; 32]) { - // CHECK-NOT: alloca - // CHECK-COUNT-2: load <4 x i64>{{.+}}align 1 - // CHECK-COUNT-2: store <4 x i64>{{.+}}align 1 - swap(x, y) -} diff --git a/tests/codegen/simd/unpadded-simd.rs b/tests/codegen/simd/unpadded-simd.rs deleted file mode 100644 index ef067a15702..00000000000 --- a/tests/codegen/simd/unpadded-simd.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Make sure that no 0-sized padding is inserted in structs and that -// structs are represented as expected by Neon intrinsics in LLVM. -// See #87254. - -#![crate_type = "lib"] -#![feature(repr_simd, abi_unadjusted)] - -#[derive(Copy, Clone)] -#[repr(simd)] -pub struct int16x4_t(pub [i16; 4]); - -#[derive(Copy, Clone)] -pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t); - -// CHECK: %int16x4x2_t = type { <4 x i16>, <4 x i16> } -#[no_mangle] -extern "unadjusted" fn takes_int16x4x2_t(t: int16x4x2_t) -> int16x4x2_t { - t -} |
