diff options
| author | bors <bors@rust-lang.org> | 2023-09-06 22:26:37 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2023-09-06 22:26:37 +0000 |
| commit | d4f12a5ed3e574b911965f0c2323e1008ee87afd (patch) | |
| tree | cfdca527b7ab9826c2e27546e397f038925046e0 | |
| parent | 61a0b77e17531e6d65d8fad80588237ee5e14529 (diff) | |
| parent | 4cd51770154a0fb0cddd1e94acd34bf7e559f03c (diff) | |
| download | rust-d4f12a5ed3e574b911965f0c2323e1008ee87afd.tar.gz rust-d4f12a5ed3e574b911965f0c2323e1008ee87afd.zip | |
Auto merge of #115580 - eduardosm:stdarch-intrinsics, r=davidtwco,bjorn3
Update stdarch submodule and remove special handling in cranelift codegen for some AVX and SSE2 LLVM intrinsics
https://github.com/rust-lang/stdarch/pull/1463 reimplemented some x86 intrinsics to avoid using some x86-specific LLVM intrinsics:
* Store unaligned (`_mm*_storeu_*`) use `<*mut _>::write_unaligned` instead of `llvm.x86.*.storeu.*`.
* Shift by immediate (`_mm*_s{ll,rl,ra}i_epi*`) use `if` (srl, sll) or `min` (sra) to simulate the behaviour when the RHS is out of range. RHS is constant, so the `if`/`min` will be optimized away.
This PR updates the stdarch submodule to pull these changes and removes special handling for those LLVM intrinsics from cranelift codegen. I left gcc codegen untouched because there are some autogenerated lists.
| -rw-r--r-- | src/intrinsics/llvm_x86.rs | 248 |
1 files changed, 0 insertions, 248 deletions
diff --git a/src/intrinsics/llvm_x86.rs b/src/intrinsics/llvm_x86.rs index fdd27a454e0..e62de6b6147 100644 --- a/src/intrinsics/llvm_x86.rs +++ b/src/intrinsics/llvm_x86.rs @@ -177,244 +177,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( bool_to_zero_or_max_uint(fx, res_lane_ty, res_lane) }); } - "llvm.x86.sse2.psrli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrai.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrai.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.pslli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.pslli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrai.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.psrai.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.pslli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.sse2.pslli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx.psrli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx.psrai.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrai.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.psrli.q" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrli.q imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 64 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.sse2.pslli.q" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.pslli.q imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 64 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx.pslli.d" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.pslli.d imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 32 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx2.psrli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrli.w imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ushr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx2.psrai.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.psrai.w imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().sshr_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } - "llvm.x86.avx2.pslli.w" => { - let (a, imm8) = match args { - [a, imm8] => (a, imm8), - _ => bug!("wrong number of args for intrinsic {intrinsic}"), - }; - let a = codegen_operand(fx, a); - let imm8 = crate::constant::mir_operand_get_const_val(fx, imm8) - .expect("llvm.x86.avx.pslli.w imm8 not const"); - - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| match imm8 - .try_to_bits(Size::from_bytes(4)) - .unwrap_or_else(|| panic!("imm8 not scalar: {:?}", imm8)) - { - imm8 if imm8 < 16 => fx.bcx.ins().ishl_imm(lane, i64::from(imm8 as u8)), - _ => fx.bcx.ins().iconst(types::I32, 0), - }); - } "llvm.x86.ssse3.pshuf.b.128" | "llvm.x86.avx2.pshuf.b" => { let (a, b) = match args { [a, b] => (a, b), @@ -506,14 +268,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ret.place_lane(fx, 2).to_ptr().store(fx, res_2, MemFlags::trusted()); ret.place_lane(fx, 3).to_ptr().store(fx, res_3, MemFlags::trusted()); } - "llvm.x86.sse2.storeu.dq" | "llvm.x86.sse2.storeu.pd" => { - intrinsic_args!(fx, args => (mem_addr, a); intrinsic); - let mem_addr = mem_addr.load_scalar(fx); - - // FIXME correctly handle the unalignment - let dest = CPlace::for_ptr(Pointer::new(mem_addr), a.layout()); - dest.write_cvalue(fx, a); - } "llvm.x86.ssse3.pabs.b.128" | "llvm.x86.ssse3.pabs.w.128" | "llvm.x86.ssse3.pabs.d.128" => { let a = match args { [a] => a, @@ -571,8 +325,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( // llvm.x86.avx2.vperm2i128 // llvm.x86.ssse3.pshuf.b.128 // llvm.x86.avx2.pshuf.b -// llvm.x86.avx2.psrli.w -// llvm.x86.sse2.psrli.w fn llvm_add_sub<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, |
