diff options
Diffstat (limited to 'compiler/rustc_codegen_cranelift/example')
| -rw-r--r-- | compiler/rustc_codegen_cranelift/example/neon.rs | 43 | ||||
| -rw-r--r-- | compiler/rustc_codegen_cranelift/example/std_example.rs | 28 |
2 files changed, 71 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_cranelift/example/neon.rs b/compiler/rustc_codegen_cranelift/example/neon.rs index 00e437d7e54..69ce17d3d75 100644 --- a/compiler/rustc_codegen_cranelift/example/neon.rs +++ b/compiler/rustc_codegen_cranelift/example/neon.rs @@ -203,6 +203,44 @@ unsafe fn test_vqadd_u8() { } #[cfg(target_arch = "aarch64")] +unsafe fn test_vmaxq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.fmax.v4f32 + let a = f32x4::from([0., -1., 2., -3.]); + let b = f32x4::from([-4., 5., -6., 7.]); + let e = f32x4::from([0., 5., 2., 7.]); + let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vminq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.fmin.v4f32 + let a = f32x4::from([0., -1., 2., -3.]); + let b = f32x4::from([-4., 5., -6., 7.]); + let e = f32x4::from([-4., -1., -6., -3.]); + let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vaddvq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.faddv.f32.v4f32 + let a = f32x4::from([0., 1., 2., 3.]); + let e = 6f32; + let r = vaddvq_f32(transmute(a)); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] +unsafe fn test_vrndnq_f32() { + // AArch64 llvm intrinsic: llvm.aarch64.neon.frintn.v4f32 + let a = f32x4::from([0.1, -1.9, 4.5, 5.5]); + let e = f32x4::from([0., -2., 4., 6.]); + let r: f32x4 = transmute(vrndnq_f32(transmute(a))); + assert_eq!(r, e); +} + +#[cfg(target_arch = "aarch64")] fn main() { unsafe { test_vpmin_s8(); @@ -229,6 +267,11 @@ fn main() { test_vqsub_u8(); test_vqadd_u8(); + + test_vmaxq_f32(); + test_vminq_f32(); + test_vaddvq_f32(); + test_vrndnq_f32(); } } diff --git a/compiler/rustc_codegen_cranelift/example/std_example.rs b/compiler/rustc_codegen_cranelift/example/std_example.rs index 602452d8276..ebaa9c69c81 100644 --- a/compiler/rustc_codegen_cranelift/example/std_example.rs +++ b/compiler/rustc_codegen_cranelift/example/std_example.rs @@ -258,6 +258,9 @@ unsafe fn test_simd() { test_mm_insert_epi16(); test_mm_shuffle_epi8(); + #[cfg(not(jit))] + test_mm_cmpestri(); + test_mm256_shuffle_epi8(); test_mm256_permute2x128_si256(); test_mm256_permutevar8x32_epi32(); @@ -429,6 +432,31 @@ unsafe fn test_mm_shuffle_epi8() { assert_eq_m128i(r, expected); } +// Currently one cannot `load` a &[u8] that is less than 16 +// in length. This makes loading strings less than 16 in length +// a bit difficult. Rather than `load` and mutate the __m128i, +// it is easier to memcpy the given string to a local slice with +// length 16 and `load` the local slice. +#[cfg(not(jit))] +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "sse4.2")] +unsafe fn str_to_m128i(s: &[u8]) -> __m128i { + assert!(s.len() <= 16); + let slice = &mut [0u8; 16]; + std::ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len()); + _mm_loadu_si128(slice.as_ptr() as *const _) +} + +#[cfg(not(jit))] +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "sse4.2")] +unsafe fn test_mm_cmpestri() { + let a = str_to_m128i(b"bar - garbage"); + let b = str_to_m128i(b"foobar"); + let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6); + assert_eq!(3, i); +} + #[cfg(target_arch = "x86_64")] #[target_feature(enable = "avx2")] unsafe fn test_mm256_shuffle_epi8() { |
