about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFolkert <folkert@folkertdev.nl>2024-05-11 21:16:38 +0200
committerFolkert <folkert@folkertdev.nl>2024-05-11 21:58:25 +0200
commit4a4535a57cef0182d516888f1abb5d4a9ec84fdc (patch)
treea82dac497730b516281d15be59e621dfa23d9e87
parent9e4e8054882de9e0f9496e415eecd5205a52f0e7 (diff)
downloadrust-4a4535a57cef0182d516888f1abb5d4a9ec84fdc.tar.gz
rust-4a4535a57cef0182d516888f1abb5d4a9ec84fdc.zip
add `llvm.x86.avx2.permd` intrinsic
-rw-r--r--example/std_example.rs11
-rw-r--r--src/intrinsics/llvm_x86.rs15
2 files changed, 26 insertions, 0 deletions
diff --git a/example/std_example.rs b/example/std_example.rs
index 90d4ab721da..0e1004420dd 100644
--- a/example/std_example.rs
+++ b/example/std_example.rs
@@ -244,6 +244,7 @@ unsafe fn test_simd() {
 
     test_mm256_shuffle_epi8();
     test_mm256_permute2x128_si256();
+    test_mm256_permutevar8x32_epi32();
 
     #[rustfmt::skip]
     let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
@@ -447,6 +448,16 @@ unsafe fn test_mm256_permute2x128_si256() {
     assert_eq_m256i(r, e);
 }
 
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "avx2")]
+unsafe fn test_mm256_permutevar8x32_epi32() {
+    let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
+    let idx = _mm256_setr_epi32(7, 6, 5, 4, 3, 2, 1, 0);
+    let r = _mm256_setr_epi32(800, 700, 600, 500, 400, 300, 200, 100);
+    let e = _mm256_permutevar8x32_epi32(a, idx);
+    assert_eq_m256i(r, e);
+}
+
 fn test_checked_mul() {
     let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
     assert_eq!(u, None);
diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs
index 8df83c706a1..f0c48884745 100644
--- a/src/intrinsics/llvm_x86.rs
+++ b/src/intrinsics/llvm_x86.rs
@@ -374,6 +374,21 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
                 }
             }
         }
+        "llvm.x86.avx2.permd" => {
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32
+            intrinsic_args!(fx, args => (a, idx); intrinsic);
+
+            for j in 0..=7 {
+                let index = idx.value_typed_lane(fx, fx.tcx.types.u32, j).load_scalar(fx);
+                let index = fx.bcx.ins().uextend(fx.pointer_type, index);
+                let value = a.value_lane_dyn(fx, index).load_scalar(fx);
+                ret.place_typed_lane(fx, fx.tcx.types.u32, j).to_ptr().store(
+                    fx,
+                    value,
+                    MemFlags::trusted(),
+                );
+            }
+        }
         "llvm.x86.avx2.vperm2i128"
         | "llvm.x86.avx.vperm2f128.ps.256"
         | "llvm.x86.avx.vperm2f128.pd.256" => {