diff options
| author | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2024-08-09 17:18:46 +0000 |
|---|---|---|
| committer | bjorn3 <17426603+bjorn3@users.noreply.github.com> | 2024-08-09 17:18:46 +0000 |
| commit | af7ea3135deb4c2e4636d002b892c7ecf054a2e9 (patch) | |
| tree | 02b4c574195cf95b4ab1332bc12d159da0fd1170 /compiler/rustc_codegen_cranelift/src | |
| parent | 899eb03926be23f2e5d2ffcaa1d6f9ac40af7f13 (diff) | |
| parent | 69b3f5a426a5c1c05236a45b36f6679d95fbe01b (diff) | |
| download | rust-af7ea3135deb4c2e4636d002b892c7ecf054a2e9.tar.gz rust-af7ea3135deb4c2e4636d002b892c7ecf054a2e9.zip | |
Merge commit '69b3f5a426a5c1c05236a45b36f6679d95fbe01b' into sync_cg_clif-2024-08-09
Diffstat (limited to 'compiler/rustc_codegen_cranelift/src')
6 files changed, 96 insertions, 183 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/archive.rs b/compiler/rustc_codegen_cranelift/src/archive.rs index 084654af09d..5eedab4f2cb 100644 --- a/compiler/rustc_codegen_cranelift/src/archive.rs +++ b/compiler/rustc_codegen_cranelift/src/archive.rs @@ -1,8 +1,13 @@ +use std::borrow::Borrow; +use std::fs; use std::path::Path; +use ar_archive_writer::{COFFShortExport, MachineTypes}; use rustc_codegen_ssa::back::archive::{ - ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER, + create_mingw_dll_import_lib, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, + DEFAULT_OBJECT_READER, }; +use rustc_codegen_ssa::common::is_mingw_gnu_toolchain; use rustc_session::Session; pub(crate) struct ArArchiveBuilderBuilder; @@ -15,10 +20,74 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { fn create_dll_import_lib( &self, sess: &Session, - _lib_name: &str, - _import_name_and_ordinal_vector: Vec<(String, Option<u16>)>, - _output_path: &Path, + lib_name: &str, + import_name_and_ordinal_vector: Vec<(String, Option<u16>)>, + output_path: &Path, ) { - sess.dcx().fatal("raw-dylib is not yet supported by rustc_codegen_cranelift"); + if is_mingw_gnu_toolchain(&sess.target) { + // The binutils linker used on -windows-gnu targets cannot read the import + // libraries generated by LLVM: in our attempts, the linker produced an .EXE + // that loaded but crashed with an AV upon calling one of the imported + // functions. Therefore, use binutils to create the import library instead, + // by writing a .DEF file to the temp dir and calling binutils's dlltool. + create_mingw_dll_import_lib( + sess, + lib_name, + import_name_and_ordinal_vector, + output_path, + ); + } else { + let mut file = + match fs::OpenOptions::new().write(true).create_new(true).open(&output_path) { + Ok(file) => file, + Err(error) => { + sess.dcx().fatal(format!( + "failed to create import library file `{path}`: {error}", + path = output_path.display(), + )); + } + }; + + let machine = match sess.target.arch.borrow() { + "x86" => MachineTypes::I386, + "x86_64" => MachineTypes::AMD64, + "arm" => MachineTypes::ARMNT, + "aarch64" => MachineTypes::ARM64, + _ => { + sess.dcx().fatal(format!( + "unsupported target architecture `{arch}`", + arch = sess.target.arch, + )); + } + }; + + let exports = import_name_and_ordinal_vector + .iter() + .map(|(name, ordinal)| COFFShortExport { + name: name.to_string(), + ext_name: None, + symbol_name: None, + alias_target: None, + ordinal: ordinal.unwrap_or(0), + noname: ordinal.is_some(), + data: false, + private: false, + constant: false, + }) + .collect::<Vec<_>>(); + + if let Err(error) = ar_archive_writer::write_import_library( + &mut file, + lib_name, + &exports, + machine, + !sess.target.is_like_msvc, + ) { + sess.dcx().fatal(format!( + "failed to create import library `{path}`: `{error}`", + path = output_path.display(), + )); + } + } } } diff --git a/compiler/rustc_codegen_cranelift/src/codegen_i128.rs b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs index e16b77648d1..b6a4769e031 100644 --- a/compiler/rustc_codegen_cranelift/src/codegen_i128.rs +++ b/compiler/rustc_codegen_cranelift/src/codegen_i128.rs @@ -23,19 +23,7 @@ pub(crate) fn maybe_codegen<'tcx>( match bin_op { BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => None, BinOp::Add | BinOp::AddUnchecked | BinOp::Sub | BinOp::SubUnchecked => None, - BinOp::Mul | BinOp::MulUnchecked => { - let args = [lhs.load_scalar(fx), rhs.load_scalar(fx)]; - let ret_val = fx.lib_call( - "__multi3", - vec![AbiParam::new(types::I128), AbiParam::new(types::I128)], - vec![AbiParam::new(types::I128)], - &args, - )[0]; - Some(CValue::by_val( - ret_val, - fx.layout_of(if is_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 }), - )) - } + BinOp::Mul | BinOp::MulUnchecked => None, BinOp::Offset => unreachable!("offset should only be used on pointers, not 128bit ints"), BinOp::Div | BinOp::Rem => { let name = match (bin_op, is_signed) { @@ -92,6 +80,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>( match bin_op { BinOp::BitAnd | BinOp::BitOr | BinOp::BitXor => unreachable!(), + BinOp::Add | BinOp::Sub => None, BinOp::Mul if is_signed => { let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]); let oflow = CPlace::new_stack_slot(fx, fx.layout_of(fx.tcx.types.i32)); @@ -112,7 +101,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>( let oflow = fx.bcx.ins().ireduce(types::I8, oflow); Some(CValue::by_val_pair(res, oflow, fx.layout_of(out_ty))) } - BinOp::Add | BinOp::Sub | BinOp::Mul => { + BinOp::Mul => { let out_ty = Ty::new_tup(fx.tcx, &[lhs.layout().ty, fx.tcx.types.bool]); let out_place = CPlace::new_stack_slot(fx, fx.layout_of(out_ty)); let param_types = vec![ @@ -121,15 +110,7 @@ pub(crate) fn maybe_codegen_checked<'tcx>( AbiParam::new(types::I128), ]; let args = [out_place.to_ptr().get_addr(fx), lhs.load_scalar(fx), rhs.load_scalar(fx)]; - let name = match (bin_op, is_signed) { - (BinOp::Add, false) => "__rust_u128_addo", - (BinOp::Add, true) => "__rust_i128_addo", - (BinOp::Sub, false) => "__rust_u128_subo", - (BinOp::Sub, true) => "__rust_i128_subo", - (BinOp::Mul, false) => "__rust_u128_mulo", - _ => unreachable!(), - }; - fx.lib_call(name, param_types, vec![], &args); + fx.lib_call("__rust_u128_mulo", param_types, vec![], &args); Some(out_place.to_cvalue(fx)) } BinOp::AddUnchecked | BinOp::SubUnchecked | BinOp::MulUnchecked => unreachable!(), diff --git a/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs b/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs index f3b963200a0..4154a62234c 100644 --- a/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs +++ b/compiler/rustc_codegen_cranelift/src/compiler_builtins.rs @@ -38,18 +38,12 @@ builtin_functions! { register_functions_for_jit; // integers - fn __multi3(a: i128, b: i128) -> i128; fn __muloti4(n: i128, d: i128, oflow: &mut i32) -> i128; fn __udivti3(n: u128, d: u128) -> u128; fn __divti3(n: i128, d: i128) -> i128; fn __umodti3(n: u128, d: u128) -> u128; fn __modti3(n: i128, d: i128) -> i128; - fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool); - fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool); - fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool); - fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool); fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool); - fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool); // floats fn __floattisf(i: i128) -> f32; diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs index a20faa2cad3..cb003037c26 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/llvm_x86.rs @@ -169,39 +169,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( } } - "llvm.x86.sse.add.ss" => { - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_ss&ig_expand=171 - intrinsic_args!(fx, args => (a, b); intrinsic); - - assert_eq!(a.layout(), b.layout()); - assert_eq!(a.layout(), ret.layout()); - let layout = a.layout(); - - let (_, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); - assert!(lane_ty.is_floating_point()); - let ret_lane_layout = fx.layout_of(lane_ty); - - ret.write_cvalue(fx, a); - - let a_lane = a.value_lane(fx, 0).load_scalar(fx); - let b_lane = b.value_lane(fx, 0).load_scalar(fx); - - let res = fx.bcx.ins().fadd(a_lane, b_lane); - - let res_lane = CValue::by_val(res, ret_lane_layout); - ret.place_lane(fx, 0).write_cvalue(fx, res_lane); - } - - "llvm.x86.sse.sqrt.ps" => { - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sqrt_ps&ig_expand=6245 - intrinsic_args!(fx, args => (a); intrinsic); - - // FIXME use vector instructions when possible - simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, lane| { - fx.bcx.ins().sqrt(lane) - }); - } - "llvm.x86.sse.max.ps" => { // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_ps&ig_expand=4357 intrinsic_args!(fx, args => (a, b); intrinsic); @@ -744,117 +711,6 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>( pack_instruction(fx, a, b, ret, PackSize::S16, PackWidth::Avx); } - "llvm.x86.fma.vfmaddsub.ps" - | "llvm.x86.fma.vfmaddsub.pd" - | "llvm.x86.fma.vfmaddsub.ps.256" - | "llvm.x86.fma.vfmaddsub.pd.256" => { - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185 - intrinsic_args!(fx, args => (a, b, c); intrinsic); - - assert_eq!(a.layout(), b.layout()); - assert_eq!(a.layout(), c.layout()); - let layout = a.layout(); - - let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); - let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert!(lane_ty.is_floating_point()); - assert!(ret_lane_ty.is_floating_point()); - assert_eq!(lane_count, ret_lane_count); - let ret_lane_layout = fx.layout_of(ret_lane_ty); - - for idx in 0..lane_count { - let a_lane = a.value_lane(fx, idx).load_scalar(fx); - let b_lane = b.value_lane(fx, idx).load_scalar(fx); - let c_lane = c.value_lane(fx, idx).load_scalar(fx); - - let mul = fx.bcx.ins().fmul(a_lane, b_lane); - let res = if idx & 1 == 0 { - fx.bcx.ins().fsub(mul, c_lane) - } else { - fx.bcx.ins().fadd(mul, c_lane) - }; - - let res_lane = CValue::by_val(res, ret_lane_layout); - ret.place_lane(fx, idx).write_cvalue(fx, res_lane); - } - } - - "llvm.x86.fma.vfmsubadd.ps" - | "llvm.x86.fma.vfmsubadd.pd" - | "llvm.x86.fma.vfmsubadd.ps.256" - | "llvm.x86.fma.vfmsubadd.pd.256" => { - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305 - intrinsic_args!(fx, args => (a, b, c); intrinsic); - - assert_eq!(a.layout(), b.layout()); - assert_eq!(a.layout(), c.layout()); - let layout = a.layout(); - - let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); - let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert!(lane_ty.is_floating_point()); - assert!(ret_lane_ty.is_floating_point()); - assert_eq!(lane_count, ret_lane_count); - let ret_lane_layout = fx.layout_of(ret_lane_ty); - - for idx in 0..lane_count { - let a_lane = a.value_lane(fx, idx).load_scalar(fx); - let b_lane = b.value_lane(fx, idx).load_scalar(fx); - let c_lane = c.value_lane(fx, idx).load_scalar(fx); - - let mul = fx.bcx.ins().fmul(a_lane, b_lane); - let res = if idx & 1 == 0 { - fx.bcx.ins().fadd(mul, c_lane) - } else { - fx.bcx.ins().fsub(mul, c_lane) - }; - - let res_lane = CValue::by_val(res, ret_lane_layout); - ret.place_lane(fx, idx).write_cvalue(fx, res_lane); - } - } - - "llvm.x86.fma.vfnmadd.ps" - | "llvm.x86.fma.vfnmadd.pd" - | "llvm.x86.fma.vfnmadd.ps.256" - | "llvm.x86.fma.vfnmadd.pd.256" => { - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395 - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371 - intrinsic_args!(fx, args => (a, b, c); intrinsic); - - assert_eq!(a.layout(), b.layout()); - assert_eq!(a.layout(), c.layout()); - let layout = a.layout(); - - let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx); - let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx); - assert!(lane_ty.is_floating_point()); - assert!(ret_lane_ty.is_floating_point()); - assert_eq!(lane_count, ret_lane_count); - let ret_lane_layout = fx.layout_of(ret_lane_ty); - - for idx in 0..lane_count { - let a_lane = a.value_lane(fx, idx).load_scalar(fx); - let b_lane = b.value_lane(fx, idx).load_scalar(fx); - let c_lane = c.value_lane(fx, idx).load_scalar(fx); - - let mul = fx.bcx.ins().fmul(a_lane, b_lane); - let neg_mul = fx.bcx.ins().fneg(mul); - let res = fx.bcx.ins().fadd(neg_mul, c_lane); - - let res_lane = CValue::by_val(res, ret_lane_layout); - ret.place_lane(fx, idx).write_cvalue(fx, res_lane); - } - } - "llvm.x86.sse42.crc32.32.8" | "llvm.x86.sse42.crc32.32.16" | "llvm.x86.sse42.crc32.32.32" diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs index 9d46d8d6dac..f737af25b62 100644 --- a/compiler/rustc_codegen_cranelift/src/lib.rs +++ b/compiler/rustc_codegen_cranelift/src/lib.rs @@ -190,9 +190,20 @@ impl CodegenBackend for CraneliftCodegenBackend { if sess.target.arch == "x86_64" && sess.target.os != "none" { // x86_64 mandates SSE2 support vec![Symbol::intern("fxsr"), sym::sse, Symbol::intern("sse2")] - } else if sess.target.arch == "aarch64" && sess.target.os != "none" { - // AArch64 mandates Neon support - vec![sym::neon] + } else if sess.target.arch == "aarch64" { + match &*sess.target.os { + "none" => vec![], + // On macOS the aes, sha2 and sha3 features are enabled by default and ring + // fails to compile on macOS when they are not present. + "macos" => vec![ + sym::neon, + Symbol::intern("aes"), + Symbol::intern("sha2"), + Symbol::intern("sha3"), + ], + // AArch64 mandates Neon support + _ => vec![sym::neon], + } } else { vec![] } diff --git a/compiler/rustc_codegen_cranelift/src/value_and_place.rs b/compiler/rustc_codegen_cranelift/src/value_and_place.rs index 1aa28daeafc..8eb2095e523 100644 --- a/compiler/rustc_codegen_cranelift/src/value_and_place.rs +++ b/compiler/rustc_codegen_cranelift/src/value_and_place.rs @@ -677,8 +677,10 @@ impl<'tcx> CPlace<'tcx> { let to_addr = to_ptr.get_addr(fx); let src_layout = from.1; let size = dst_layout.size.bytes(); - let src_align = src_layout.align.abi.bytes() as u8; - let dst_align = dst_layout.align.abi.bytes() as u8; + // `emit_small_memory_copy` uses `u8` for alignments, just use the maximum + // alignment that fits in a `u8` if the actual alignment is larger. + let src_align = src_layout.align.abi.bytes().try_into().unwrap_or(128); + let dst_align = dst_layout.align.abi.bytes().try_into().unwrap_or(128); fx.bcx.emit_small_memory_copy( fx.target_config, to_addr, |
