diff options
| author | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2024-03-08 20:41:29 +0000 |
|---|---|---|
| committer | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2024-03-08 20:41:29 +0000 |
| commit | 8fb8b087164e8271240f3114984fa53151e282fb (patch) | |
| tree | f891d1b3d7cff36dba67e14420e15fafd5ad5c9f /compiler/rustc_codegen_cranelift/src | |
| parent | a655e648a9f94d74263108366b83e677af56e35d (diff) | |
| parent | 54cbb6e7531f95e086d5c3dd0d5e73bfbe3545ba (diff) | |
| download | rust-8fb8b087164e8271240f3114984fa53151e282fb.tar.gz rust-8fb8b087164e8271240f3114984fa53151e282fb.zip | |
Merge commit '54cbb6e7531f95e086d5c3dd0d5e73bfbe3545ba' into sync_cg_clif-2024-03-08
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
5 files changed, 205 insertions, 8 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/common.rs b/compiler/rustc_codegen_cranelift/src/common.rs index e35ec4fe1c7..7e29d407a1f 100644 --- a/compiler/rustc_codegen_cranelift/src/common.rs +++ b/compiler/rustc_codegen_cranelift/src/common.rs @@ -392,18 +392,25 @@ impl<'tcx> FunctionCx<'_, '_, 'tcx> { } pub(crate) fn create_stack_slot(&mut self, size: u32, align: u32) -> Pointer { - if align <= 16 { + let abi_align = if self.tcx.sess.target.arch == "s390x" { 8 } else { 16 }; + if align <= abi_align { let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData { kind: StackSlotKind::ExplicitSlot, - // FIXME Don't force the size to a multiple of 16 bytes once Cranelift gets a way to - // specify stack slot alignment. - size: (size + 15) / 16 * 16, + // FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets + // a way to specify stack slot alignment. + size: (size + abi_align - 1) / abi_align * abi_align, }); Pointer::stack_slot(stack_slot) } else { // Alignment is too big to handle using the above hack. Dynamically realign a stack slot // instead. This wastes some space for the realignment. - let base_ptr = self.create_stack_slot(size + align, 16).get_addr(self); + let stack_slot = self.bcx.create_sized_stack_slot(StackSlotData { + kind: StackSlotKind::ExplicitSlot, + // FIXME Don't force the size to a multiple of <abi_align> bytes once Cranelift gets + // a way to specify stack slot alignment. + size: (size + align) / abi_align * abi_align, + }); + let base_ptr = self.bcx.ins().stack_addr(self.pointer_type, stack_slot, 0); let misalign_offset = self.bcx.ins().urem_imm(base_ptr, i64::from(align)); let realign_offset = self.bcx.ins().irsub_imm(misalign_offset, i64::from(align)); Pointer::new(self.bcx.ins().iadd(base_ptr, realign_offset)) diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs index b6de688130c..18c5960ffc6 100644 --- a/compiler/rustc_codegen_cranelift/src/constant.rs +++ b/compiler/rustc_codegen_cranelift/src/constant.rs @@ -372,7 +372,13 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant } let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec(); - data.define(bytes.into_boxed_slice()); + if bytes.is_empty() { + // FIXME(bytecodealliance/wasmtime#7918) cranelift-jit has a bug where it causes UB on + // empty data objects + data.define(Box::new([0])); + } else { + data.define(bytes.into_boxed_slice()); + } for &(offset, prov) in alloc.provenance().ptrs().iter() { let alloc_id = prov.alloc_id(); diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs index 2e3e7ce986b..1615dc5de69 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs @@ -170,6 +170,65 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } + "llvm.x86.sse.add.ss" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171 + intrinsic_args!(fx, args => (a, b); intrinsic); + + assert_eq!(a.layout(), b.layout()); + assert_eq!(a.layout(), ret.layout()); + let layout = a.layout(); + + let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); + assert!(lane_ty.is_floating_point()); + let ret_lane_layout = fx.layout_of(lane_ty); + + ret.write_cvalue(fx, a); + + let a_lane = a.value_lane(fx, 0).load_scalar(fx); + let b_lane = b.value_lane(fx, 0).load_scalar(fx); + + let res = fx.bcx.ins().fadd(a_lane, b_lane); + + let res_lane = CValue::by_val(res, ret_lane_layout); + ret.place_lane(fx, 0).write_cvalue(fx, res_lane); + } + + "llvm.x86.sse.sqrt.ps" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245 + intrinsic_args!(fx, args => (a); intrinsic); + + // FIXME use vector instructions when possible + simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { + fx.bcx.ins().sqrt(lane) + }); + } + + "llvm.x86.sse.max.ps" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357 + intrinsic_args!(fx, args => (a, b); intrinsic); + + simd_pair_for_each_lane( + fx, + a, + b, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmax(a_lane, b_lane), + ); + } + + "llvm.x86.sse.min.ps" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_ps&ig_expand=4489 + intrinsic_args!(fx, args => (a, b); intrinsic); + + simd_pair_for_each_lane( + fx, + a, + b, + ret, + &|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| fx.bcx.ins().fmin(a_lane, b_lane), + ); + } + "llvm.x86.sse.cmp.ps" | "llvm.x86.sse2.cmp.pd" => { let (x, y, kind) = match args { [x, y, kind] => (x, y, kind), @@ -1067,6 +1126,122 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( ); } + "llvm.x86.sha1rnds4" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1rnds4_epu32&ig_expand=5877 + intrinsic_args!(fx, args => (a, b, _func); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let func = if let Some(func) = + crate::constant::mir_operand_get_const_val(fx, &args[2].node) + { + func + } else { + fx.tcx + .dcx() + .span_fatal(span, "Func argument for `_mm_sha1rnds4_epu32` is not a constant"); + }; + + let func = func.try_to_u8().unwrap_or_else(|_| panic!("kind not scalar: {:?}", func)); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String(format!("sha1rnds4 xmm1, xmm2, {func}"))], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha1msg1" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg1_epu32&ig_expand=5874 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha1msg1 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha1msg2" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1msg2_epu32&ig_expand=5875 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha1msg2 xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + + "llvm.x86.sha1nexte" => { + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha1nexte_epu32&ig_expand=5876 + intrinsic_args!(fx, args => (a, b); intrinsic); + + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + codegen_inline_asm_inner( + fx, + &[InlineAsmTemplatePiece::String("sha1nexte xmm1, xmm2".to_string())], + &[ + CInlineAsmOperand::InOut { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm1)), + _late: true, + in_value: a, + out_place: Some(ret), + }, + CInlineAsmOperand::In { + reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm2)), + value: b, + }, + ], + InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM, + ); + } + "llvm.x86.sha256rnds2" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sha256rnds2_epu32&ig_expand=5977 intrinsic_args!(fx, args => (a, b, k); intrinsic); diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs index 9b8167fa2bf..8b86a116df1 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs @@ -391,12 +391,15 @@ fn codegen_float_intrinsic_call<'tcx>( | sym::ceilf32 | sym::ceilf64 | sym::truncf32 - | sym::truncf64 => { + | sym::truncf64 + | sym::sqrtf32 + | sym::sqrtf64 => { let val = match intrinsic { sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]), sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]), sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]), sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]), + sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]), _ => unreachable!(), }; diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index 8f662808522..79da970a58e 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -853,7 +853,13 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }; for lane in 0..lane_count { - let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(lane) as i64); + // The bit order of the mask depends on the byte endianness, LSB-first for + // little endian and MSB-first for big endian. + let mask_lane = match fx.tcx.sess.target.endian { + Endian::Big => lane_count - 1 - lane, + Endian::Little => lane, + }; + let m_lane = fx.bcx.ins().ushr_imm(m, u64::from(mask_lane) as i64); let m_lane = fx.bcx.ins().band_imm(m_lane, 1); let a_lane = a.value_lane(fx, lane).load_scalar(fx); let b_lane = b.value_lane(fx, lane).load_scalar(fx); |
