about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbjorn3 <17426603+bjorn3@users.noreply.github.com>2023-11-24 19:38:13 +0000
committerbjorn3 <17426603+bjorn3@users.noreply.github.com>2023-11-24 20:45:22 +0100
commit3b49b9efd5e91b3104bc377ab64ee88aaa3ce31d (patch)
tree01fa0b2c267bba9844545dc5d16351a7a4d9ff73
parentd5a7ae7976ec02196ec9893f79fa06612059dfbc (diff)
downloadrust-3b49b9efd5e91b3104bc377ab64ee88aaa3ce31d.tar.gz
rust-3b49b9efd5e91b3104bc377ab64ee88aaa3ce31d.zip
Implement the int part of the gather family vendor intrinsics
-rw-r--r--src/intrinsics/llvm_x86.rs22
1 files changed, 16 insertions, 6 deletions
diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs
index 6cccc8b8396..2108b4bb9ff 100644
--- a/src/intrinsics/llvm_x86.rs
+++ b/src/intrinsics/llvm_x86.rs
@@ -74,12 +74,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, val);
         }
 
-        "llvm.x86.avx2.gather.d.ps"
+        "llvm.x86.avx2.gather.d.d"
+        | "llvm.x86.avx2.gather.d.q"
+        | "llvm.x86.avx2.gather.d.ps"
         | "llvm.x86.avx2.gather.d.pd"
+        | "llvm.x86.avx2.gather.d.d.256"
+        | "llvm.x86.avx2.gather.d.q.256"
         | "llvm.x86.avx2.gather.d.ps.256"
         | "llvm.x86.avx2.gather.d.pd.256"
+        | "llvm.x86.avx2.gather.q.d"
+        | "llvm.x86.avx2.gather.q.q"
         | "llvm.x86.avx2.gather.q.ps"
         | "llvm.x86.avx2.gather.q.pd"
+        | "llvm.x86.avx2.gather.q.d.256"
+        | "llvm.x86.avx2.gather.q.q.256"
         | "llvm.x86.avx2.gather.q.ps.256"
         | "llvm.x86.avx2.gather.q.pd.256" => {
             // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818
@@ -94,10 +102,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
             let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx);
             let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx);
             let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
-            assert!(src_lane_ty.is_floating_point());
+            assert_eq!(src_lane_ty, ret_lane_ty);
             assert!(index_lane_ty.is_integral());
-            assert!(mask_lane_ty.is_floating_point());
-            assert!(ret_lane_ty.is_floating_point());
             assert_eq!(src_lane_count, mask_lane_count);
             assert_eq!(src_lane_count, ret_lane_count);
 
@@ -122,8 +128,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
                 let res_lane = fx.bcx.append_block_param(next, lane_clif_ty);
 
                 let mask_lane = match mask_lane_clif_ty {
-                    types::F32 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64),
-                    types::F64 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64),
+                    types::I32 | types::F32 => {
+                        fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64)
+                    }
+                    types::I64 | types::F64 => {
+                        fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64)
+                    }
                     _ => unreachable!(),
                 };
                 fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]);