diff options
| author | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2024-05-11 22:11:53 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-05-11 22:11:53 +0200 |
| commit | 893ba536bc929899fb4662ba24d9ed8f82a643cd (patch) | |
| tree | 8346e0a839c9fd6884c9a7dcc35bfc3444af3c6e | |
| parent | 7b50189dce74cf5b86cd928161138a2bd6c9305f (diff) | |
| parent | 4a4535a57cef0182d516888f1abb5d4a9ec84fdc (diff) | |
| download | rust-893ba536bc929899fb4662ba24d9ed8f82a643cd.tar.gz rust-893ba536bc929899fb4662ba24d9ed8f82a643cd.zip | |
Merge pull request #1491 from folkertdev/add-llvm-avx2-permd
add `llvm.x86.avx2.permd` intrinsic
| -rw-r--r-- | example/std_example.rs | 11 | ||||
| -rw-r--r-- | src/intrinsics/llvm_x86.rs | 15 |
2 files changed, 26 insertions, 0 deletions
diff --git a/example/std_example.rs b/example/std_example.rs index 90d4ab721da..0e1004420dd 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -244,6 +244,7 @@ unsafe fn test_simd() { test_mm256_shuffle_epi8(); test_mm256_permute2x128_si256(); + test_mm256_permutevar8x32_epi32(); #[rustfmt::skip] let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); @@ -447,6 +448,16 @@ unsafe fn test_mm256_permute2x128_si256() { assert_eq_m256i(r, e); } +#[cfg(target_arch = "x86_64")] +#[target_feature(enable = "avx2")] +unsafe fn test_mm256_permutevar8x32_epi32() { + let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800); + let idx = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm256_setr_epi32(800, 700, 600, 500, 400, 300, 200, 100); + let e = _mm256_permutevar8x32_epi32(a, idx); + assert_eq_m256i(r, e); +} + fn test_checked_mul() { let u: Option<u8> = u8::from_str_radix("1000", 10).ok(); assert_eq!(u, None); diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 71e06c73a37..75e9e16db55 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -374,6 +374,21 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } } + "llvm.x86.avx2.permd" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32 + intrinsic_args!(fx, args => (a, idx); intrinsic); + + for j in 0..=7 { + let index = idx.value_typed_lane(fx, fx.tcx.types.u32, j).load_scalar(fx); + let index = fx.bcx.ins().uextend(fx.pointer_type, index); + let value = a.value_lane_dyn(fx, index).load_scalar(fx); + ret.place_typed_lane(fx, fx.tcx.types.u32, j).to_ptr().store( + fx, + value, + MemFlags::trusted(), + ); + } + } "llvm.x86.avx2.vperm2i128" | "llvm.x86.avx.vperm2f128.ps.256" | "llvm.x86.avx.vperm2f128.pd.256" => { |
