diff options
| author | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2023-11-24 19:38:13 +0000 |
|---|---|---|
| committer | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2023-11-24 20:45:22 +0100 |
| commit | 3b49b9efd5e91b3104bc377ab64ee88aaa3ce31d (patch) | |
| tree | 01fa0b2c267bba9844545dc5d16351a7a4d9ff73 | |
| parent | d5a7ae7976ec02196ec9893f79fa06612059dfbc (diff) | |
| download | rust-3b49b9efd5e91b3104bc377ab64ee88aaa3ce31d.tar.gz rust-3b49b9efd5e91b3104bc377ab64ee88aaa3ce31d.zip | |
Implement the int part of the gather family vendor intrinsics
| -rw-r--r-- | src/intrinsics/llvm_x86.rs | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index 6cccc8b8396..2108b4bb9ff 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -74,12 +74,20 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.write_cvalue(fx, val); } - "llvm.x86.avx2.gather.d.ps" + "llvm.x86.avx2.gather.d.d" + | "llvm.x86.avx2.gather.d.q" + | "llvm.x86.avx2.gather.d.ps" | "llvm.x86.avx2.gather.d.pd" + | "llvm.x86.avx2.gather.d.d.256" + | "llvm.x86.avx2.gather.d.q.256" | "llvm.x86.avx2.gather.d.ps.256" | "llvm.x86.avx2.gather.d.pd.256" + | "llvm.x86.avx2.gather.q.d" + | "llvm.x86.avx2.gather.q.q" | "llvm.x86.avx2.gather.q.ps" | "llvm.x86.avx2.gather.q.pd" + | "llvm.x86.avx2.gather.q.d.256" + | "llvm.x86.avx2.gather.q.q.256" | "llvm.x86.avx2.gather.q.ps.256" | "llvm.x86.avx2.gather.q.pd.256" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd&ig_expand=3818 @@ -94,10 +102,8 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let (index_lane_count, index_lane_ty) = index.layout().ty.simd_size_and_type(fx.tcx); let (mask_lane_count, mask_lane_ty) = mask.layout().ty.simd_size_and_type(fx.tcx); let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert!(src_lane_ty.is_floating_point()); + assert_eq!(src_lane_ty, ret_lane_ty); assert!(index_lane_ty.is_integral()); - assert!(mask_lane_ty.is_floating_point()); - assert!(ret_lane_ty.is_floating_point()); assert_eq!(src_lane_count, mask_lane_count); assert_eq!(src_lane_count, ret_lane_count); @@ -122,8 +128,12 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( let res_lane = fx.bcx.append_block_param(next, lane_clif_ty); let mask_lane = match mask_lane_clif_ty { - types::F32 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64), - types::F64 => fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64), + types::I32 | types::F32 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000u64 as i64) + } + types::I64 | types::F64 => { + fx.bcx.ins().band_imm(mask_lane, 0x8000_0000_0000_0000u64 as i64) + } _ => unreachable!(), }; fx.bcx.ins().brif(mask_lane, if_enabled, &[], if_disabled, &[]); |
