diff options
| author | Matthias Krüger <matthias.krueger@famsik.de> | 2024-06-15 14:40:48 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-06-15 14:40:48 +0200 |
| commit | 0f2cc21547cda91e515dc934d3d325088b043655 (patch) | |
| tree | d7bd6f7181a2ded586b0376fda6b4507e1d34f4b /compiler/rustc_codegen_llvm/src | |
| parent | dad74aa67c349b27468c4d1da93500175a105835 (diff) | |
| parent | dfc55145270c5ec8de1bcd19ae05f056100108a5 (diff) | |
| download | rust-0f2cc21547cda91e515dc934d3d325088b043655.tar.gz rust-0f2cc21547cda91e515dc934d3d325088b043655.zip | |
Rollup merge of #126417 - beetrees:f16-f128-inline-asm-x86, r=Amanieu
Add `f16` and `f128` inline ASM support for `x86` and `x86-64` This PR adds `f16` and `f128` input and output support to inline ASM on `x86` and `x86-64`. `f16` vector sizes are taken from [here](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html). Relevant issue: #125398 Tracking issue: #116909 ``@rustbot`` label +F-f16_and_f128
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/asm.rs | 100 |
1 files changed, 100 insertions, 0 deletions
diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index db28c6857b7..60e63b956db 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -960,6 +960,43 @@ fn llvm_fixup_input<'ll, 'tcx>( Abi::Vector { .. }, ) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)), ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if bx.sess().asm_arch == Some(InlineAsmArch::X86) + && s.primitive() == Primitive::Float(Float::F128) => + { + bx.bitcast(value, bx.type_vector(bx.type_i32(), 4)) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if s.primitive() == Primitive::Float(Float::F16) => { + let value = bx.insert_element( + bx.const_undef(bx.type_vector(bx.type_f16(), 8)), + value, + bx.const_usize(0), + ); + bx.bitcast(value, bx.type_vector(bx.type_i16(), 8)) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Vector { element, count: count @ (8 | 16) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + bx.bitcast(value, bx.type_vector(bx.type_i16(), count)) + } + ( InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16), Abi::Scalar(s), ) => { @@ -1037,6 +1074,39 @@ fn llvm_fixup_output<'ll, 'tcx>( Abi::Vector { .. }, ) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)), ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if bx.sess().asm_arch == Some(InlineAsmArch::X86) + && s.primitive() == Primitive::Float(Float::F128) => + { + bx.bitcast(value, bx.type_f128()) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if s.primitive() == Primitive::Float(Float::F16) => { + let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8)); + bx.extract_element(value, bx.const_usize(0)) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Vector { element, count: count @ (8 | 16) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + bx.bitcast(value, bx.type_vector(bx.type_f16(), count)) + } + ( InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16), Abi::Scalar(s), ) => { @@ -1110,6 +1180,36 @@ fn llvm_fixup_output_type<'ll, 'tcx>( Abi::Vector { .. }, ) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8), ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if cx.sess().asm_arch == Some(InlineAsmArch::X86) + && s.primitive() == Primitive::Float(Float::F128) => + { + cx.type_vector(cx.type_i32(), 4) + } + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Scalar(s), + ) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8), + ( + InlineAsmRegClass::X86( + X86InlineAsmRegClass::xmm_reg + | X86InlineAsmRegClass::ymm_reg + | X86InlineAsmRegClass::zmm_reg, + ), + Abi::Vector { element, count: count @ (8 | 16) }, + ) if element.primitive() == Primitive::Float(Float::F16) => { + cx.type_vector(cx.type_i16(), count) + } + ( InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16), Abi::Scalar(s), ) => { |
