diff options
| author | Antoni Boucher <bouanto@zoho.com> | 2023-10-09 15:53:34 -0400 |
|---|---|---|
| committer | Antoni Boucher <bouanto@zoho.com> | 2023-10-09 15:53:34 -0400 |
| commit | 30290c8b411a837b2e19293391956f5f779608ab (patch) | |
| tree | 0a59bbb2fda11bca09f135f56ea3025c5daea590 /compiler/rustc_codegen_gcc/src | |
| parent | cdddcd3bea35049dab56888d4391cb9b5b1b4491 (diff) | |
| parent | 11a0cceab966e5ff1058ddbcab5977e8a1d6d290 (diff) | |
| download | rust-30290c8b411a837b2e19293391956f5f779608ab.tar.gz rust-30290c8b411a837b2e19293391956f5f779608ab.zip | |
Merge commit '11a0cceab966e5ff1058ddbcab5977e8a1d6d290' into subtree-update_cg_gcc_2023-10-09
Diffstat (limited to 'compiler/rustc_codegen_gcc/src')
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/abi.rs | 44 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/allocator.rs | 109 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/asm.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/attributes.rs | 102 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/back/lto.rs | 341 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/back/mod.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/back/write.rs | 102 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/base.rs | 71 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/builder.rs | 29 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/declare.rs | 40 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/errors.rs | 87 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/gcc_util.rs | 223 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/int.rs | 81 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/archs.rs | 114 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs | 20 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/intrinsic/mod.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/lib.rs | 184 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/type_of.rs | 10 |
18 files changed, 1275 insertions, 291 deletions
diff --git a/compiler/rustc_codegen_gcc/src/abi.rs b/compiler/rustc_codegen_gcc/src/abi.rs index a49530ebb4c..35bb0b6e5f4 100644 --- a/compiler/rustc_codegen_gcc/src/abi.rs +++ b/compiler/rustc_codegen_gcc/src/abi.rs @@ -3,7 +3,9 @@ use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods}; use rustc_data_structures::fx::FxHashSet; use rustc_middle::bug; use rustc_middle::ty::Ty; -use rustc_target::abi::call::{CastTarget, FnAbi, PassMode, Reg, RegKind}; +#[cfg(feature = "master")] +use rustc_session::config; +use rustc_target::abi::call::{ArgAttributes, CastTarget, FnAbi, PassMode, Reg, RegKind}; use crate::builder::Builder; use crate::context::CodegenCx; @@ -120,30 +122,50 @@ impl<'gcc, 'tcx> FnAbiGccExt<'gcc, 'tcx> for FnAbi<'tcx, Ty<'tcx>> { } }; + #[cfg(feature = "master")] + let apply_attrs = |ty: Type<'gcc>, attrs: &ArgAttributes| { + if cx.sess().opts.optimize != config::OptLevel::No + && attrs.regular.contains(rustc_target::abi::call::ArgAttribute::NoAlias) + { + ty.make_restrict() + } else { + ty + } + }; + #[cfg(not(feature = "master"))] + let apply_attrs = |ty: Type<'gcc>, _attrs: &ArgAttributes| { + ty + }; + for arg in self.args.iter() { let arg_ty = match arg.mode { PassMode::Ignore => continue, - PassMode::Direct(_) => arg.layout.immediate_gcc_type(cx), - PassMode::Pair(..) => { - argument_tys.push(arg.layout.scalar_pair_element_gcc_type(cx, 0)); - argument_tys.push(arg.layout.scalar_pair_element_gcc_type(cx, 1)); + PassMode::Pair(a, b) => { + argument_tys.push(apply_attrs(arg.layout.scalar_pair_element_gcc_type(cx, 0), &a)); + argument_tys.push(apply_attrs(arg.layout.scalar_pair_element_gcc_type(cx, 1), &b)); continue; } - PassMode::Indirect { meta_attrs: Some(_), .. } => { - unimplemented!(); - } PassMode::Cast { ref cast, pad_i32 } => { // add padding if pad_i32 { argument_tys.push(Reg::i32().gcc_type(cx)); } - cast.gcc_type(cx) + let ty = cast.gcc_type(cx); + apply_attrs(ty, &cast.attrs) } - PassMode::Indirect { meta_attrs: None, on_stack: true, .. } => { + PassMode::Indirect { attrs: _, meta_attrs: None, on_stack: true } => { + // This is a "byval" argument, so we don't apply the `restrict` attribute on it. on_stack_param_indices.insert(argument_tys.len()); arg.memory_ty(cx) }, - PassMode::Indirect { meta_attrs: None, on_stack: false, .. } => cx.type_ptr_to(arg.memory_ty(cx)), + PassMode::Direct(attrs) => apply_attrs(arg.layout.immediate_gcc_type(cx), &attrs), + PassMode::Indirect { attrs, meta_attrs: None, on_stack: false } => { + apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs) + } + PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => { + assert!(!on_stack); + apply_attrs(apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs), &meta_attrs) + } }; argument_tys.push(arg_ty); } diff --git a/compiler/rustc_codegen_gcc/src/allocator.rs b/compiler/rustc_codegen_gcc/src/allocator.rs index edd7ab722f6..c8c098e2973 100644 --- a/compiler/rustc_codegen_gcc/src/allocator.rs +++ b/compiler/rustc_codegen_gcc/src/allocator.rs @@ -1,6 +1,6 @@ #[cfg(feature="master")] use gccjit::FnAttribute; -use gccjit::{FunctionType, GlobalKind, ToRValue}; +use gccjit::{Context, FunctionType, GlobalKind, ToRValue, Type}; use rustc_ast::expand::allocator::{ alloc_error_handler_name, default_fn_name, global_fn_name, AllocatorKind, AllocatorTy, ALLOCATOR_METHODS, NO_ALLOC_SHIM_IS_UNSTABLE, @@ -22,7 +22,6 @@ pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_nam }; let i8 = context.new_type::<i8>(); let i8p = i8.make_pointer(); - let void = context.new_type::<()>(); if kind == AllocatorKind::Default { for method in ALLOCATOR_METHODS { @@ -47,67 +46,62 @@ pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_nam panic!("invalid allocator output") } }; - let name = global_fn_name(method.name); + let from_name = global_fn_name(method.name); + let to_name = default_fn_name(method.name); - let args: Vec<_> = types.iter().enumerate() - .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index))) - .collect(); - let func = context.new_function(None, FunctionType::Exported, output.unwrap_or(void), &args, name, false); + create_wrapper_function(tcx, context, &from_name, &to_name, &types, output); + } + } - if tcx.sess.target.options.default_hidden_visibility { - #[cfg(feature="master")] - func.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden)); - } - if tcx.sess.must_emit_unwind_tables() { - // TODO(antoyo): emit unwind tables. - } + // FIXME(bjorn3): Add noreturn attribute + create_wrapper_function( + tcx, + context, + "__rust_alloc_error_handler", + &alloc_error_handler_name(alloc_error_handler_kind), + &[usize, usize], + None, + ); - let callee = default_fn_name(method.name); - let args: Vec<_> = types.iter().enumerate() - .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index))) - .collect(); - let callee = context.new_function(None, FunctionType::Extern, output.unwrap_or(void), &args, callee, false); - #[cfg(feature="master")] - callee.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden)); - - let block = func.new_block("entry"); - - let args = args - .iter() - .enumerate() - .map(|(i, _)| func.get_param(i as i32).to_rvalue()) - .collect::<Vec<_>>(); - let ret = context.new_call(None, callee, &args); - //llvm::LLVMSetTailCall(ret, True); - if output.is_some() { - block.end_with_return(None, ret); - } - else { - block.end_with_void_return(None); - } + let name = OomStrategy::SYMBOL.to_string(); + let global = context.new_global(None, GlobalKind::Exported, i8, name); + let value = tcx.sess.opts.unstable_opts.oom.should_panic(); + let value = context.new_rvalue_from_int(i8, value as i32); + global.global_set_initializer_rvalue(value); - // TODO(@Commeownist): Check if we need to emit some extra debugging info in certain circumstances - // as described in https://github.com/rust-lang/rust/commit/77a96ed5646f7c3ee8897693decc4626fe380643 - } - } + let name = NO_ALLOC_SHIM_IS_UNSTABLE.to_string(); + let global = context.new_global(None, GlobalKind::Exported, i8, name); + let value = context.new_rvalue_from_int(i8, 0); + global.global_set_initializer_rvalue(value); +} + +fn create_wrapper_function( + tcx: TyCtxt<'_>, + context: &Context<'_>, + from_name: &str, + to_name: &str, + types: &[Type<'_>], + output: Option<Type<'_>>, +) { + let void = context.new_type::<()>(); - let types = [usize, usize]; - let name = "__rust_alloc_error_handler".to_string(); let args: Vec<_> = types.iter().enumerate() .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index))) .collect(); - let func = context.new_function(None, FunctionType::Exported, void, &args, name, false); + let func = context.new_function(None, FunctionType::Exported, output.unwrap_or(void), &args, from_name, false); - if tcx.sess.target.default_hidden_visibility { + if tcx.sess.target.options.default_hidden_visibility { #[cfg(feature="master")] func.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden)); } + if tcx.sess.must_emit_unwind_tables() { + // TODO(antoyo): emit unwind tables. + } - let callee = alloc_error_handler_name(alloc_error_handler_kind); let args: Vec<_> = types.iter().enumerate() .map(|(index, typ)| context.new_parameter(None, *typ, &format!("param{}", index))) .collect(); - let callee = context.new_function(None, FunctionType::Extern, void, &args, callee, false); + let callee = context.new_function(None, FunctionType::Extern, output.unwrap_or(void), &args, to_name, false); #[cfg(feature="master")] callee.add_attribute(FnAttribute::Visibility(gccjit::Visibility::Hidden)); @@ -118,18 +112,15 @@ pub(crate) unsafe fn codegen(tcx: TyCtxt<'_>, mods: &mut GccContext, _module_nam .enumerate() .map(|(i, _)| func.get_param(i as i32).to_rvalue()) .collect::<Vec<_>>(); - let _ret = context.new_call(None, callee, &args); + let ret = context.new_call(None, callee, &args); //llvm::LLVMSetTailCall(ret, True); - block.end_with_void_return(None); - - let name = OomStrategy::SYMBOL.to_string(); - let global = context.new_global(None, GlobalKind::Exported, i8, name); - let value = tcx.sess.opts.unstable_opts.oom.should_panic(); - let value = context.new_rvalue_from_int(i8, value as i32); - global.global_set_initializer_rvalue(value); + if output.is_some() { + block.end_with_return(None, ret); + } + else { + block.end_with_void_return(None); + } - let name = NO_ALLOC_SHIM_IS_UNSTABLE.to_string(); - let global = context.new_global(None, GlobalKind::Exported, i8, name); - let value = context.new_rvalue_from_int(i8, 0); - global.global_set_initializer_rvalue(value); + // TODO(@Commeownist): Check if we need to emit some extra debugging info in certain circumstances + // as described in https://github.com/rust-lang/rust/commit/77a96ed5646f7c3ee8897693decc4626fe380643 } diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs index 905fdac92e9..f3a9ca77a67 100644 --- a/compiler/rustc_codegen_gcc/src/asm.rs +++ b/compiler/rustc_codegen_gcc/src/asm.rs @@ -452,10 +452,6 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { } InlineAsmOperandRef::Const { ref string } => { - // Const operands get injected directly into the template - if att_dialect { - template_str.push('$'); - } template_str.push_str(string); } } diff --git a/compiler/rustc_codegen_gcc/src/attributes.rs b/compiler/rustc_codegen_gcc/src/attributes.rs index eb0cce19b85..971e019a4f6 100644 --- a/compiler/rustc_codegen_gcc/src/attributes.rs +++ b/compiler/rustc_codegen_gcc/src/attributes.rs @@ -4,72 +4,13 @@ use gccjit::Function; use rustc_attr::InstructionSetAttr; #[cfg(feature="master")] use rustc_attr::InlineAttr; -use rustc_codegen_ssa::target_features::tied_target_features; -use rustc_data_structures::fx::FxHashMap; use rustc_middle::ty; #[cfg(feature="master")] use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; -use rustc_session::Session; use rustc_span::symbol::sym; -use smallvec::{smallvec, SmallVec}; use crate::{context::CodegenCx, errors::TiedTargetFeatures}; - -// Given a map from target_features to whether they are enabled or disabled, -// ensure only valid combinations are allowed. -pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> { - for tied in tied_target_features(sess) { - // Tied features must be set to the same value, or not set at all - let mut tied_iter = tied.iter(); - let enabled = features.get(tied_iter.next().unwrap()); - if tied_iter.any(|feature| enabled != features.get(feature)) { - return Some(tied); - } - } - None -} - -// TODO(antoyo): maybe move to a new module gcc_util. -// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html -fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> { - let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch }; - match (arch, s) { - ("x86", "sse4.2") => smallvec!["sse4.2", "crc32"], - ("x86", "pclmulqdq") => smallvec!["pclmul"], - ("x86", "rdrand") => smallvec!["rdrnd"], - ("x86", "bmi1") => smallvec!["bmi"], - ("x86", "cmpxchg16b") => smallvec!["cx16"], - ("x86", "avx512vaes") => smallvec!["vaes"], - ("x86", "avx512gfni") => smallvec!["gfni"], - ("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"], - // NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'. - ("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"], - // NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'. - ("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"], - ("aarch64", "rcpc2") => smallvec!["rcpc-immo"], - ("aarch64", "dpb") => smallvec!["ccpp"], - ("aarch64", "dpb2") => smallvec!["ccdp"], - ("aarch64", "frintts") => smallvec!["fptoint"], - ("aarch64", "fcma") => smallvec!["complxnum"], - ("aarch64", "pmuv3") => smallvec!["perfmon"], - ("aarch64", "paca") => smallvec!["pauth"], - ("aarch64", "pacg") => smallvec!["pauth"], - // Rust ties fp and neon together. In LLVM neon implicitly enables fp, - // but we manually enable neon when a feature only implicitly enables fp - ("aarch64", "f32mm") => smallvec!["f32mm", "neon"], - ("aarch64", "f64mm") => smallvec!["f64mm", "neon"], - ("aarch64", "fhm") => smallvec!["fp16fml", "neon"], - ("aarch64", "fp16") => smallvec!["fullfp16", "neon"], - ("aarch64", "jsconv") => smallvec!["jsconv", "neon"], - ("aarch64", "sve") => smallvec!["sve", "neon"], - ("aarch64", "sve2") => smallvec!["sve2", "neon"], - ("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"], - ("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"], - ("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"], - ("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"], - (_, s) => smallvec![s], - } -} +use crate::gcc_util::{check_tied_features, to_gcc_features}; /// Get GCC attribute for the provided inline heuristic. #[cfg(feature="master")] @@ -114,6 +55,19 @@ pub fn from_fn_attrs<'gcc, 'tcx>( if let Some(attr) = inline_attr(cx, inline) { func.add_attribute(attr); } + + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) { + func.add_attribute(FnAttribute::Cold); + } + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_RETURNS_TWICE) { + func.add_attribute(FnAttribute::ReturnsTwice); + } + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_PURE) { + func.add_attribute(FnAttribute::Pure); + } + if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_CONST) { + func.add_attribute(FnAttribute::Const); + } } let function_features = @@ -140,11 +94,33 @@ pub fn from_fn_attrs<'gcc, 'tcx>( })) .collect::<Vec<_>>(); - // TODO(antoyo): check if we really need global backend features. (Maybe they could be applied - // globally?) + // TODO(antoyo): cg_llvm adds global features to each function so that LTO keep them. + // Check if GCC requires the same. let mut global_features = cx.tcx.global_backend_features(()).iter().map(|s| s.as_str()); function_features.extend(&mut global_features); - let target_features = function_features.join(","); + let target_features = function_features + .iter() + .filter_map(|feature| { + // FIXME(antoyo): for some reasons, disabling SSE results in the following error when + // compiling Rust for Linux: + // SSE register return with SSE disabled + // TODO(antoyo): support soft-float and retpoline-external-thunk. + if feature.contains("soft-float") || feature.contains("retpoline-external-thunk") || *feature == "-sse" { + return None; + } + + if feature.starts_with('-') { + Some(format!("no{}", feature)) + } + else if feature.starts_with('+') { + Some(feature[1..].to_string()) + } + else { + Some(feature.to_string()) + } + }) + .collect::<Vec<_>>() + .join(","); if !target_features.is_empty() { #[cfg(feature="master")] func.add_attribute(FnAttribute::Target(&target_features)); diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs new file mode 100644 index 00000000000..529454b119e --- /dev/null +++ b/compiler/rustc_codegen_gcc/src/back/lto.rs @@ -0,0 +1,341 @@ +/// GCC requires to use the same toolchain for the whole compilation when doing LTO. +/// So, we need the same version/commit of the linker (gcc) and lto front-end binaries (lto1, +/// lto-wrapper, liblto_plugin.so). + +// FIXME(antoyo): the executables compiled with LTO are bigger than those compiled without LTO. +// Since it is the opposite for cg_llvm, check if this is normal. +// +// Maybe we embed the bitcode in the final binary? +// It doesn't look like we try to generate fat objects for the final binary. +// Check if the way we combine the object files make it keep the LTO sections on the final link. +// Maybe that's because the combined object files contain the IR (true) and the final link +// does not remove it? +// +// TODO(antoyo): for performance, check which optimizations the C++ frontend enables. +// +// Fix these warnings: +// /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o +// /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o +// /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization + +use std::ffi::CString; +use std::fs::{self, File}; +use std::path::{Path, PathBuf}; + +use gccjit::OutputKind; +use object::read::archive::ArchiveFile; +use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule}; +use rustc_codegen_ssa::back::symbol_export; +use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput}; +use rustc_codegen_ssa::traits::*; +use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind}; +use rustc_data_structures::memmap::Mmap; +use rustc_errors::{FatalError, Handler}; +use rustc_hir::def_id::LOCAL_CRATE; +use rustc_middle::dep_graph::WorkProduct; +use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel}; +use rustc_session::config::{CrateType, Lto}; +use tempfile::{TempDir, tempdir}; + +use crate::back::write::save_temp_bitcode; +use crate::errors::{ + DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, +}; +use crate::{GccCodegenBackend, GccContext, to_gcc_opt_level}; + +/// We keep track of the computed LTO cache keys from the previous +/// session to determine which CGUs we can reuse. +//pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin"; + +pub fn crate_type_allows_lto(crate_type: CrateType) -> bool { + match crate_type { + CrateType::Executable | CrateType::Dylib | CrateType::Staticlib | CrateType::Cdylib => true, + CrateType::Rlib | CrateType::ProcMacro => false, + } +} + +struct LtoData { + // TODO(antoyo): use symbols_below_threshold. + //symbols_below_threshold: Vec<CString>, + upstream_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, + tmp_path: TempDir, +} + +fn prepare_lto(cgcx: &CodegenContext<GccCodegenBackend>, diag_handler: &Handler) -> Result<LtoData, FatalError> { + let export_threshold = match cgcx.lto { + // We're just doing LTO for our one crate + Lto::ThinLocal => SymbolExportLevel::Rust, + + // We're doing LTO for the entire crate graph + Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types), + + Lto::No => panic!("didn't request LTO but we're doing LTO"), + }; + + let tmp_path = + match tempdir() { + Ok(tmp_path) => tmp_path, + Err(error) => { + eprintln!("Cannot create temporary directory: {}", error); + return Err(FatalError); + }, + }; + + let symbol_filter = &|&(ref name, info): &(String, SymbolExportInfo)| { + if info.level.is_below_threshold(export_threshold) || info.used { + Some(CString::new(name.as_str()).unwrap()) + } else { + None + } + }; + let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); + let mut symbols_below_threshold = { + let _timer = cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold"); + exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>() + }; + info!("{} symbols to preserve in this crate", symbols_below_threshold.len()); + + // If we're performing LTO for the entire crate graph, then for each of our + // upstream dependencies, find the corresponding rlib and load the bitcode + // from the archive. + // + // We save off all the bytecode and GCC module file path for later processing + // with either fat or thin LTO + let mut upstream_modules = Vec::new(); + if cgcx.lto != Lto::ThinLocal { + // Make sure we actually can run LTO + for crate_type in cgcx.crate_types.iter() { + if !crate_type_allows_lto(*crate_type) { + diag_handler.emit_err(LtoDisallowed); + return Err(FatalError); + } else if *crate_type == CrateType::Dylib { + if !cgcx.opts.unstable_opts.dylib_lto { + diag_handler.emit_err(LtoDylib); + return Err(FatalError); + } + } + } + + if cgcx.opts.cg.prefer_dynamic && !cgcx.opts.unstable_opts.dylib_lto { + diag_handler.emit_err(DynamicLinkingWithLTO); + return Err(FatalError); + } + + for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { + let exported_symbols = + cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); + { + let _timer = + cgcx.prof.generic_activity("GCC_lto_generate_symbols_below_threshold"); + symbols_below_threshold + .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter)); + } + + let archive_data = unsafe { + Mmap::map(File::open(&path).expect("couldn't open rlib")) + .expect("couldn't map rlib") + }; + let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib"); + let obj_files = archive + .members() + .filter_map(|child| { + child.ok().and_then(|c| { + std::str::from_utf8(c.name()).ok().map(|name| (name.trim(), c)) + }) + }) + .filter(|&(name, _)| looks_like_rust_object_file(name)); + for (name, child) in obj_files { + info!("adding bitcode from {}", name); + let path = tmp_path.path().join(name); + match save_as_file(child.data(&*archive_data).expect("corrupt rlib"), &path) { + Ok(()) => { + let buffer = ModuleBuffer::new(path); + let module = SerializedModule::Local(buffer); + upstream_modules.push((module, CString::new(name).unwrap())); + } + Err(e) => { + diag_handler.emit_err(e); + return Err(FatalError); + } + } + } + } + } + + Ok(LtoData { + //symbols_below_threshold, + upstream_modules, + tmp_path, + }) +} + +fn save_as_file(obj: &[u8], path: &Path) -> Result<(), LtoBitcodeFromRlib> { + fs::write(path, obj) + .map_err(|error| LtoBitcodeFromRlib { + gcc_err: format!("write object file to temp dir: {}", error) + }) +} + +/// Performs fat LTO by merging all modules into a single one and returning it +/// for further optimization. +pub(crate) fn run_fat( + cgcx: &CodegenContext<GccCodegenBackend>, + modules: Vec<FatLtoInput<GccCodegenBackend>>, + cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, +) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> { + let diag_handler = cgcx.create_diag_handler(); + let lto_data = prepare_lto(cgcx, &diag_handler)?; + /*let symbols_below_threshold = + lto_data.symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/ + fat_lto(cgcx, &diag_handler, modules, cached_modules, lto_data.upstream_modules, lto_data.tmp_path, + //&symbols_below_threshold, + ) +} + +fn fat_lto(cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, modules: Vec<FatLtoInput<GccCodegenBackend>>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, tmp_path: TempDir, + //symbols_below_threshold: &[*const libc::c_char], +) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> { + let _timer = cgcx.prof.generic_activity("GCC_fat_lto_build_monolithic_module"); + info!("going for a fat lto"); + + // Sort out all our lists of incoming modules into two lists. + // + // * `serialized_modules` (also and argument to this function) contains all + // modules that are serialized in-memory. + // * `in_memory` contains modules which are already parsed and in-memory, + // such as from multi-CGU builds. + // + // All of `cached_modules` (cached from previous incremental builds) can + // immediately go onto the `serialized_modules` modules list and then we can + // split the `modules` array into these two lists. + let mut in_memory = Vec::new(); + serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| { + info!("pushing cached module {:?}", wp.cgu_name); + (buffer, CString::new(wp.cgu_name).unwrap()) + })); + for module in modules { + match module { + FatLtoInput::InMemory(m) => in_memory.push(m), + FatLtoInput::Serialized { name, buffer } => { + info!("pushing serialized module {:?}", name); + let buffer = SerializedModule::Local(buffer); + serialized_modules.push((buffer, CString::new(name).unwrap())); + } + } + } + + // Find the "costliest" module and merge everything into that codegen unit. + // All the other modules will be serialized and reparsed into the new + // context, so this hopefully avoids serializing and parsing the largest + // codegen unit. + // + // Additionally use a regular module as the base here to ensure that various + // file copy operations in the backend work correctly. The only other kind + // of module here should be an allocator one, and if your crate is smaller + // than the allocator module then the size doesn't really matter anyway. + let costliest_module = in_memory + .iter() + .enumerate() + .filter(|&(_, module)| module.kind == ModuleKind::Regular) + .map(|(i, _module)| { + //let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) }; + // TODO(antoyo): compute the cost of a module if GCC allows this. + (0, i) + }) + .max(); + + // If we found a costliest module, we're good to go. Otherwise all our + // inputs were serialized which could happen in the case, for example, that + // all our inputs were incrementally reread from the cache and we're just + // re-executing the LTO passes. If that's the case deserialize the first + // module and create a linker with it. + let mut module: ModuleCodegen<GccContext> = match costliest_module { + Some((_cost, i)) => in_memory.remove(i), + None => { + unimplemented!("Incremental"); + /*assert!(!serialized_modules.is_empty(), "must have at least one serialized module"); + let (buffer, name) = serialized_modules.remove(0); + info!("no in-memory regular modules to choose from, parsing {:?}", name); + ModuleCodegen { + module_llvm: GccContext::parse(cgcx, &name, buffer.data(), diag_handler)?, + name: name.into_string().unwrap(), + kind: ModuleKind::Regular, + }*/ + } + }; + let mut serialized_bitcode = Vec::new(); + { + info!("using {:?} as a base module", module.name); + + // We cannot load and merge GCC contexts in memory like cg_llvm is doing. + // Instead, we combine the object files into a single object file. + for module in in_memory { + let path = tmp_path.path().to_path_buf().join(&module.name); + let path = path.to_str().expect("path"); + let context = &module.module_llvm.context; + let config = cgcx.config(module.kind); + // NOTE: we need to set the optimization level here in order for LTO to do its job. + context.set_optimization_level(to_gcc_opt_level(config.opt_level)); + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + context.compile_to_file(OutputKind::ObjectFile, path); + let buffer = ModuleBuffer::new(PathBuf::from(path)); + let llmod_id = CString::new(&module.name[..]).unwrap(); + serialized_modules.push((SerializedModule::Local(buffer), llmod_id)); + } + // Sort the modules to ensure we produce deterministic results. + serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1)); + + // We add the object files and save in should_combine_object_files that we should combine + // them into a single object file when compiling later. + for (bc_decoded, name) in serialized_modules { + let _timer = cgcx + .prof + .generic_activity_with_arg_recorder("GCC_fat_lto_link_module", |recorder| { + recorder.record_arg(format!("{:?}", name)) + }); + info!("linking {:?}", name); + match bc_decoded { + SerializedModule::Local(ref module_buffer) => { + module.module_llvm.should_combine_object_files = true; + module.module_llvm.context.add_driver_option(module_buffer.0.to_str().expect("path")); + }, + SerializedModule::FromRlib(_) => unimplemented!("from rlib"), + SerializedModule::FromUncompressedFile(_) => unimplemented!("from uncompressed file"), + } + serialized_bitcode.push(bc_decoded); + } + save_temp_bitcode(cgcx, &module, "lto.input"); + + // Internalize everything below threshold to help strip out more modules and such. + /*unsafe { + let ptr = symbols_below_threshold.as_ptr(); + llvm::LLVMRustRunRestrictionPass( + llmod, + ptr as *const *const libc::c_char, + symbols_below_threshold.len() as libc::size_t, + );*/ + save_temp_bitcode(cgcx, &module, "lto.after-restriction"); + //} + } + + // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead + // of now. + module.module_llvm.temp_dir = Some(tmp_path); + + Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: serialized_bitcode }) +} + +pub struct ModuleBuffer(PathBuf); + +impl ModuleBuffer { + pub fn new(path: PathBuf) -> ModuleBuffer { + ModuleBuffer(path) + } +} + +impl ModuleBufferMethods for ModuleBuffer { + fn data(&self) -> &[u8] { + unimplemented!("data not needed for GCC codegen"); + } +} diff --git a/compiler/rustc_codegen_gcc/src/back/mod.rs b/compiler/rustc_codegen_gcc/src/back/mod.rs index d692799d764..10187eab0d7 100644 --- a/compiler/rustc_codegen_gcc/src/back/mod.rs +++ b/compiler/rustc_codegen_gcc/src/back/mod.rs @@ -1 +1,2 @@ +pub mod lto; pub mod write; diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs index 5f54ac4ebc6..04772d7707a 100644 --- a/compiler/rustc_codegen_gcc/src/back/write.rs +++ b/compiler/rustc_codegen_gcc/src/back/write.rs @@ -2,27 +2,71 @@ use std::{env, fs}; use gccjit::OutputKind; use rustc_codegen_ssa::{CompiledModule, ModuleCodegen}; -use rustc_codegen_ssa::back::write::{CodegenContext, EmitObj, ModuleConfig}; +use rustc_codegen_ssa::back::link::ensure_removed; +use rustc_codegen_ssa::back::write::{BitcodeSection, CodegenContext, EmitObj, ModuleConfig}; use rustc_errors::Handler; +use rustc_fs_util::link_or_copy; use rustc_session::config::OutputType; use rustc_span::fatal_error::FatalError; use rustc_target::spec::SplitDebuginfo; use crate::{GccCodegenBackend, GccContext}; +use crate::errors::CopyBitcode; -pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, module: ModuleCodegen<GccContext>, config: &ModuleConfig) -> Result<CompiledModule, FatalError> { - let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name); +pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, diag_handler: &Handler, module: ModuleCodegen<GccContext>, config: &ModuleConfig) -> Result<CompiledModule, FatalError> { + let _timer = cgcx.prof.generic_activity_with_arg("GCC_module_codegen", &*module.name); { let context = &module.module_llvm.context; let module_name = module.name.clone(); + + let should_combine_object_files = module.module_llvm.should_combine_object_files; + let module_name = Some(&module_name[..]); - let _bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); + // NOTE: Only generate object files with GIMPLE when this environment variable is set for + // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit). + let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1"); + + let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name); - if config.bitcode_needed() { + if config.bitcode_needed() && fat_lto { + let _timer = cgcx + .prof + .generic_activity_with_arg("GCC_module_codegen_make_bitcode", &*module.name); + // TODO(antoyo) + /*if let Some(bitcode_filename) = bc_out.file_name() { + cgcx.prof.artifact_size( + "llvm_bitcode", + bitcode_filename.to_string_lossy(), + data.len() as u64, + ); + }*/ + + if config.emit_bc || config.emit_obj == EmitObj::Bitcode { + let _timer = cgcx + .prof + .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name); + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str")); + } + + if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) { + let _timer = cgcx + .prof + .generic_activity_with_arg("GCC_module_codegen_embed_bitcode", &*module.name); + // TODO(antoyo): maybe we should call embed_bitcode to have the proper iOS fixes? + //embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data); + + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + context.add_command_line_option("-ffat-lto-objects"); + // TODO(antoyo): Send -plugin/usr/lib/gcc/x86_64-pc-linux-gnu/11.1.0/liblto_plugin.so to linker (this should be done when specifying the appropriate rustc cli argument). + context.compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str")); + } } if config.emit_ir { @@ -32,7 +76,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han if config.emit_asm { let _timer = cgcx .prof - .generic_activity_with_arg("LLVM_module_codegen_emit_asm", &*module.name); + .generic_activity_with_arg("GCC_module_codegen_emit_asm", &*module.name); let path = cgcx.output_filenames.temp_path(OutputType::Assembly, module_name); context.compile_to_file(OutputKind::Assembler, path.to_str().expect("path to str")); } @@ -41,7 +85,7 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han EmitObj::ObjectCode(_) => { let _timer = cgcx .prof - .generic_activity_with_arg("LLVM_module_codegen_emit_obj", &*module.name); + .generic_activity_with_arg("GCC_module_codegen_emit_obj", &*module.name); if env::var("CG_GCCJIT_DUMP_MODULE_NAMES").as_deref() == Ok("1") { println!("Module {}", module.name); } @@ -60,11 +104,36 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han context.set_debug_info(true); context.dump_to_file(path, true); } - context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str")); + if should_combine_object_files && fat_lto { + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + + context.add_driver_option("-Wl,-r"); + // NOTE: we need -nostdlib, otherwise, we get the following error: + // /usr/bin/ld: cannot find -lgcc_s: No such file or directory + context.add_driver_option("-nostdlib"); + // NOTE: without -fuse-linker-plugin, we get the following error: + // lto1: internal compiler error: decompressed stream: Destination buffer is too small + context.add_driver_option("-fuse-linker-plugin"); + + // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o. + context.compile_to_file(OutputKind::Executable, obj_out.to_str().expect("path to str")); + } + else { + context.compile_to_file(OutputKind::ObjectFile, obj_out.to_str().expect("path to str")); + } } EmitObj::Bitcode => { - // TODO(antoyo) + debug!("copying bitcode {:?} to obj {:?}", bc_out, obj_out); + if let Err(err) = link_or_copy(&bc_out, &obj_out) { + diag_handler.emit_err(CopyBitcode { err }); + } + + if !config.emit_bc { + debug!("removing_bitcode {:?}", bc_out); + ensure_removed(diag_handler, &bc_out); + } } EmitObj::None => {} @@ -82,3 +151,18 @@ pub(crate) unsafe fn codegen(cgcx: &CodegenContext<GccCodegenBackend>, _diag_han pub(crate) fn link(_cgcx: &CodegenContext<GccCodegenBackend>, _diag_handler: &Handler, mut _modules: Vec<ModuleCodegen<GccContext>>) -> Result<ModuleCodegen<GccContext>, FatalError> { unimplemented!(); } + +pub(crate) fn save_temp_bitcode(cgcx: &CodegenContext<GccCodegenBackend>, _module: &ModuleCodegen<GccContext>, _name: &str) { + if !cgcx.save_temps { + return; + } + unimplemented!(); + /*unsafe { + let ext = format!("{}.bc", name); + let cgu = Some(&module.name[..]); + let path = cgcx.output_filenames.temp_path_ext(&ext, cgu); + let cstr = path_to_c_string(&path); + let llmod = module.module_llvm.llmod(); + llvm::LLVMWriteBitcodeToFile(llmod, cstr.as_ptr()); + }*/ +} diff --git a/compiler/rustc_codegen_gcc/src/base.rs b/compiler/rustc_codegen_gcc/src/base.rs index 9e614ca4ace..61da38f4b0d 100644 --- a/compiler/rustc_codegen_gcc/src/base.rs +++ b/compiler/rustc_codegen_gcc/src/base.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use std::env; use std::time::Instant; @@ -18,6 +19,7 @@ use rustc_codegen_ssa::traits::DebugInfoMethods; use rustc_session::config::DebugInfo; use rustc_span::Symbol; +use crate::{LockedTargetInfo, gcc_util}; use crate::GccContext; use crate::builder::Builder; use crate::context::CodegenCx; @@ -50,6 +52,7 @@ pub fn global_linkage_to_gcc(linkage: Linkage) -> GlobalKind { pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType { match linkage { Linkage::External => FunctionType::Exported, + // TODO(antoyo): set the attribute externally_visible. Linkage::AvailableExternally => FunctionType::Extern, Linkage::LinkOnceAny => unimplemented!(), Linkage::LinkOnceODR => unimplemented!(), @@ -63,7 +66,7 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType { } } -pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_integers: bool) -> (ModuleCodegen<GccContext>, u64) { +pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: LockedTargetInfo) -> (ModuleCodegen<GccContext>, u64) { let prof_timer = tcx.prof.generic_activity("codegen_module"); let start_time = Instant::now(); @@ -71,7 +74,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i let (module, _) = tcx.dep_graph.with_task( dep_node, tcx, - (cgu_name, supports_128bit_integers), + (cgu_name, target_info), module_codegen, Some(dep_graph::hash_result), ); @@ -82,38 +85,28 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i // the time we needed for codegenning it. let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64; - fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, supports_128bit_integers): (Symbol, bool)) -> ModuleCodegen<GccContext> { + fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, LockedTargetInfo)) -> ModuleCodegen<GccContext> { let cgu = tcx.codegen_unit(cgu_name); // Instantiate monomorphizations without filling out definitions yet... - //let llvm_module = ModuleLlvm::new(tcx, &cgu_name.as_str()); let context = Context::default(); context.add_command_line_option("-fexceptions"); context.add_driver_option("-fexceptions"); + let disabled_features: HashSet<_> = tcx.sess.opts.cg.target_feature.split(',') + .filter(|feature| feature.starts_with('-')) + .map(|string| &string[1..]) + .collect(); + // TODO(antoyo): only set on x86 platforms. context.add_command_line_option("-masm=intel"); - // TODO(antoyo): only add the following cli argument if the feature is supported. - context.add_command_line_option("-msse2"); - context.add_command_line_option("-mavx2"); - // FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU. - // Only add if the CPU supports it. - context.add_command_line_option("-msha"); - context.add_command_line_option("-mpclmul"); - context.add_command_line_option("-mfma"); - context.add_command_line_option("-mfma4"); - context.add_command_line_option("-m64"); - context.add_command_line_option("-mbmi"); - context.add_command_line_option("-mgfni"); - //context.add_command_line_option("-mavxvnni"); // The CI doesn't support this option. - context.add_command_line_option("-mf16c"); - context.add_command_line_option("-maes"); - context.add_command_line_option("-mxsavec"); - context.add_command_line_option("-mbmi2"); - context.add_command_line_option("-mrtm"); - context.add_command_line_option("-mvaes"); - context.add_command_line_option("-mvpclmulqdq"); - context.add_command_line_option("-mavx"); + + if !disabled_features.contains("avx") { + // NOTE: we always enable AVX because the equivalent of llvm.x86.sse2.cmp.pd in GCC for + // SSE2 is multiple builtins, so we use the AVX __builtin_ia32_cmppd instead. + // FIXME(antoyo): use the proper builtins for llvm.x86.sse2.cmp.pd and similar. + context.add_command_line_option("-mavx"); + } for arg in &tcx.sess.opts.cg.llvm_args { context.add_command_line_option(arg); @@ -127,6 +120,16 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i // NOTE: Rust relies on LLVM doing wrapping on overflow. context.add_command_line_option("-fwrapv"); + if tcx.sess.opts.cg.relocation_model == Some(rustc_target::spec::RelocModel::Static) { + context.add_command_line_option("-mcmodel=kernel"); + context.add_command_line_option("-fno-pie"); + } + + let target_cpu = gcc_util::target_cpu(tcx.sess); + if target_cpu != "generic" { + context.add_command_line_option(&format!("-march={}", target_cpu)); + } + if tcx.sess.opts.unstable_opts.function_sections.unwrap_or(tcx.sess.target.function_sections) { context.add_command_line_option("-ffunction-sections"); context.add_command_line_option("-fdata-sections"); @@ -135,8 +138,14 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i if env::var("CG_GCCJIT_DUMP_RTL").as_deref() == Ok("1") { context.add_command_line_option("-fdump-rtl-vregs"); } + if env::var("CG_GCCJIT_DUMP_RTL_ALL").as_deref() == Ok("1") { + context.add_command_line_option("-fdump-rtl-all"); + } if env::var("CG_GCCJIT_DUMP_TREE_ALL").as_deref() == Ok("1") { - context.add_command_line_option("-fdump-tree-all"); + context.add_command_line_option("-fdump-tree-all-eh"); + } + if env::var("CG_GCCJIT_DUMP_IPA_ALL").as_deref() == Ok("1") { + context.add_command_line_option("-fdump-ipa-all-eh"); } if env::var("CG_GCCJIT_DUMP_CODE").as_deref() == Ok("1") { context.set_dump_code_on_compile(true); @@ -152,11 +161,15 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i context.set_keep_intermediates(true); } + if env::var("CG_GCCJIT_VERBOSE").as_deref() == Ok("1") { + context.add_driver_option("-v"); + } + // NOTE: The codegen generates unrechable blocks. context.set_allow_unreachable_blocks(true); { - let cx = CodegenCx::new(&context, cgu, tcx, supports_128bit_integers); + let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int()); let mono_items = cgu.items_in_deterministic_order(tcx); for &(mono_item, data) in &mono_items { @@ -181,7 +194,9 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, supports_128bit_i ModuleCodegen { name: cgu_name.to_string(), module_llvm: GccContext { - context + context, + should_combine_object_files: false, + temp_dir: None, }, kind: ModuleKind::Regular, } diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index ecc293aee23..b7841808934 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -247,16 +247,9 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } fn check_store(&mut self, val: RValue<'gcc>, ptr: RValue<'gcc>) -> RValue<'gcc> { - let dest_ptr_ty = self.cx.val_ty(ptr).make_pointer(); // TODO(antoyo): make sure make_pointer() is okay here. let stored_ty = self.cx.val_ty(val); let stored_ptr_ty = self.cx.type_ptr_to(stored_ty); - - if dest_ptr_ty == stored_ptr_ty { - ptr - } - else { - self.bitcast(ptr, stored_ptr_ty) - } + self.bitcast(ptr, stored_ptr_ty) } pub fn current_func(&self) -> Function<'gcc> { @@ -500,7 +493,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { } #[cfg(not(feature="master"))] - fn invoke(&mut self, typ: Type<'gcc>, fn_attrs: &CodegenFnAttrs, fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> { + fn invoke(&mut self, typ: Type<'gcc>, fn_attrs: Option<&CodegenFnAttrs>, fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>, func: RValue<'gcc>, args: &[RValue<'gcc>], then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>) -> RValue<'gcc> { let call_site = self.call(typ, fn_attrs, None, func, args, None); let condition = self.context.new_rvalue_from_int(self.bool_type, 1); self.llbb().end_with_conditional(None, condition, then, catch); @@ -663,7 +656,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { } fn unchecked_sadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> { - a + b + self.gcc_add(a, b) } fn unchecked_uadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> { @@ -671,7 +664,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { } fn unchecked_ssub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> { - a - b + self.gcc_sub(a, b) } fn unchecked_usub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> { @@ -680,11 +673,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { } fn unchecked_smul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> { - a * b + self.gcc_mul(a, b) } fn unchecked_umul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> { - a * b + self.gcc_mul(a, b) } fn fadd_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> { @@ -916,7 +909,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { .add_eval(None, self.context.new_call(None, atomic_store, &[ptr, value, ordering])); } - fn gep(&mut self, _typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> { + fn gep(&mut self, typ: Type<'gcc>, ptr: RValue<'gcc>, indices: &[RValue<'gcc>]) -> RValue<'gcc> { + // NOTE: due to opaque pointers now being used, we need to cast here. + let ptr = self.context.new_cast(None, ptr, typ.make_pointer()); let ptr_type = ptr.get_type(); let mut pointee_type = ptr.get_type(); // NOTE: we cannot use array indexing here like in inbounds_gep because array indexing is @@ -927,6 +922,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // require dereferencing the pointer. for index in indices { pointee_type = pointee_type.get_pointee().expect("pointee type"); + #[cfg(feature="master")] + let pointee_size = { + let size = self.cx.context.new_sizeof(pointee_type); + self.context.new_cast(None, size, index.get_type()) + }; + #[cfg(not(feature="master"))] let pointee_size = self.context.new_rvalue_from_int(index.get_type(), pointee_type.get_size() as i32); result = result + self.gcc_int_cast(*index * pointee_size, self.sizet_type); } diff --git a/compiler/rustc_codegen_gcc/src/declare.rs b/compiler/rustc_codegen_gcc/src/declare.rs index 493626c3cf5..e673d0af4c7 100644 --- a/compiler/rustc_codegen_gcc/src/declare.rs +++ b/compiler/rustc_codegen_gcc/src/declare.rs @@ -1,4 +1,6 @@ use gccjit::{Function, FunctionType, GlobalKind, LValue, RValue, Type}; +#[cfg(feature="master")] +use gccjit::{FnAttribute, ToRValue}; use rustc_codegen_ssa::traits::BaseTypeMethods; use rustc_middle::ty::Ty; use rustc_span::Symbol; @@ -114,6 +116,44 @@ fn declare_raw_fn<'gcc>(cx: &CodegenCx<'gcc, '_>, name: &str, _callconv: () /*ll .collect(); let func = cx.context.new_function(None, cx.linkage.get(), return_type, ¶ms, mangle_name(name), variadic); cx.functions.borrow_mut().insert(name.to_string(), func); + + #[cfg(feature="master")] + if name == "rust_eh_personality" { + // NOTE: GCC will sometimes change the personality function set on a function from + // rust_eh_personality to __gcc_personality_v0 as an optimization. + // As such, we need to create a weak alias from __gcc_personality_v0 to + // rust_eh_personality in order to avoid a linker error. + // This needs to be weak in order to still allow using the standard + // __gcc_personality_v0 when the linking to it. + // Since aliases don't work (maybe because of a bug in LTO partitioning?), we + // create a wrapper function that calls rust_eh_personality. + + let params: Vec<_> = param_types.into_iter().enumerate() + .map(|(index, param)| cx.context.new_parameter(None, *param, &format!("param{}", index))) // TODO(antoyo): set name. + .collect(); + let gcc_func = cx.context.new_function(None, FunctionType::Exported, return_type, ¶ms, "__gcc_personality_v0", variadic); + + // We need a normal extern function for the crates that access rust_eh_personality + // without defining it, otherwise we'll get a compiler error. + // + // For the crate defining it, that needs to be a weak alias instead. + gcc_func.add_attribute(FnAttribute::Weak); + + let block = gcc_func.new_block("start"); + let mut args = vec![]; + for param in ¶ms { + args.push(param.to_rvalue()); + } + let call = cx.context.new_call(None, func, &args); + if return_type == cx.type_void() { + block.add_eval(None, call); + block.end_with_void_return(None); + } + else { + block.end_with_return(None, call); + } + } + func }; diff --git a/compiler/rustc_codegen_gcc/src/errors.rs b/compiler/rustc_codegen_gcc/src/errors.rs index 693367192b1..4bf3b71f503 100644 --- a/compiler/rustc_codegen_gcc/src/errors.rs +++ b/compiler/rustc_codegen_gcc/src/errors.rs @@ -1,8 +1,36 @@ -use rustc_errors::{DiagnosticArgValue, IntoDiagnosticArg}; -use rustc_macros::Diagnostic; +use rustc_errors::{ + DiagnosticArgValue, DiagnosticBuilder, ErrorGuaranteed, Handler, IntoDiagnostic, IntoDiagnosticArg, +}; +use rustc_macros::{Diagnostic, Subdiagnostic}; use rustc_span::Span; use std::borrow::Cow; +use crate::fluent_generated as fluent; + +#[derive(Diagnostic)] +#[diag(codegen_gcc_unknown_ctarget_feature_prefix)] +#[note] +pub(crate) struct UnknownCTargetFeaturePrefix<'a> { + pub feature: &'a str, +} + +#[derive(Diagnostic)] +#[diag(codegen_gcc_unknown_ctarget_feature)] +#[note] +pub(crate) struct UnknownCTargetFeature<'a> { + pub feature: &'a str, + #[subdiagnostic] + pub rust_feature: PossibleFeature<'a>, +} + +#[derive(Subdiagnostic)] +pub(crate) enum PossibleFeature<'a> { + #[help(codegen_gcc_possible_feature)] + Some { rust_feature: &'a str }, + #[help(codegen_gcc_consider_filing_feature_request)] + None, +} + struct ExitCode(Option<i32>); impl IntoDiagnosticArg for ExitCode { @@ -40,3 +68,58 @@ pub(crate) struct TiedTargetFeatures { pub span: Span, pub features: String, } + +#[derive(Diagnostic)] +#[diag(codegen_gcc_copy_bitcode)] +pub(crate) struct CopyBitcode { + pub err: std::io::Error, +} + +#[derive(Diagnostic)] +#[diag(codegen_gcc_dynamic_linking_with_lto)] +#[note] +pub(crate) struct DynamicLinkingWithLTO; + +#[derive(Diagnostic)] +#[diag(codegen_gcc_load_bitcode)] +pub(crate) struct LoadBitcode { + name: String, +} + +#[derive(Diagnostic)] +#[diag(codegen_gcc_lto_disallowed)] +pub(crate) struct LtoDisallowed; + +#[derive(Diagnostic)] +#[diag(codegen_gcc_lto_dylib)] +pub(crate) struct LtoDylib; + +#[derive(Diagnostic)] +#[diag(codegen_gcc_lto_bitcode_from_rlib)] +pub(crate) struct LtoBitcodeFromRlib { + pub gcc_err: String, +} + +pub(crate) struct TargetFeatureDisableOrEnable<'a> { + pub features: &'a [&'a str], + pub span: Option<Span>, + pub missing_features: Option<MissingFeatures>, +} + +#[derive(Subdiagnostic)] +#[help(codegen_gcc_missing_features)] +pub(crate) struct MissingFeatures; + +impl IntoDiagnostic<'_, ErrorGuaranteed> for TargetFeatureDisableOrEnable<'_> { + fn into_diagnostic(self, sess: &'_ Handler) -> DiagnosticBuilder<'_, ErrorGuaranteed> { + let mut diag = sess.struct_err(fluent::codegen_gcc_target_feature_disable_or_enable); + if let Some(span) = self.span { + diag.set_span(span); + }; + if let Some(missing_features) = self.missing_features { + diag.subdiagnostic(missing_features); + } + diag.set_arg("features", self.features.join(", ")); + diag + } +} diff --git a/compiler/rustc_codegen_gcc/src/gcc_util.rs b/compiler/rustc_codegen_gcc/src/gcc_util.rs new file mode 100644 index 00000000000..0514c9988e0 --- /dev/null +++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs @@ -0,0 +1,223 @@ +#[cfg(feature="master")] +use gccjit::Context; +use smallvec::{smallvec, SmallVec}; + +use rustc_codegen_ssa::target_features::{ + supported_target_features, tied_target_features, RUSTC_SPECIFIC_FEATURES, +}; +use rustc_data_structures::fx::FxHashMap; +use rustc_middle::bug; +use rustc_session::Session; + +use crate::errors::{PossibleFeature, TargetFeatureDisableOrEnable, UnknownCTargetFeature, UnknownCTargetFeaturePrefix}; + +/// The list of GCC features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`, +/// `--target` and similar). +pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<String> { + // Features that come earlier are overridden by conflicting features later in the string. + // Typically we'll want more explicit settings to override the implicit ones, so: + // + // * Features from -Ctarget-cpu=*; are overridden by [^1] + // * Features implied by --target; are overridden by + // * Features from -Ctarget-feature; are overridden by + // * function specific features. + // + // [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly + // through GCC march implementation. + // + // FIXME(nagisa): it isn't clear what's the best interaction between features implied by + // `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always + // override anything that's implicit, so e.g. when there's no `--target` flag, features implied + // the host target are overridden by `-Ctarget-cpu=*`. On the other hand, what about when both + // `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both + // flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence + // should be taken in cases like these. + let mut features = vec![]; + + // Features implied by an implicit or explicit `--target`. + features.extend( + sess.target + .features + .split(',') + .filter(|v| !v.is_empty() && backend_feature_name(v).is_some()) + .map(String::from), + ); + + // -Ctarget-features + let supported_features = supported_target_features(sess); + let mut featsmap = FxHashMap::default(); + let feats = sess.opts.cg.target_feature + .split(',') + .filter_map(|s| { + let enable_disable = match s.chars().next() { + None => return None, + Some(c @ ('+' | '-')) => c, + Some(_) => { + if diagnostics { + sess.emit_warning(UnknownCTargetFeaturePrefix { feature: s }); + } + return None; + } + }; + + let feature = backend_feature_name(s)?; + // Warn against use of GCC specific feature names on the CLI. + if diagnostics && !supported_features.iter().any(|&(v, _)| v == feature) { + let rust_feature = supported_features.iter().find_map(|&(rust_feature, _)| { + let gcc_features = to_gcc_features(sess, rust_feature); + if gcc_features.contains(&feature) && !gcc_features.contains(&rust_feature) { + Some(rust_feature) + } else { + None + } + }); + let unknown_feature = + if let Some(rust_feature) = rust_feature { + UnknownCTargetFeature { + feature, + rust_feature: PossibleFeature::Some { rust_feature }, + } + } + else { + UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None } + }; + sess.emit_warning(unknown_feature); + } + + if diagnostics { + // FIXME(nagisa): figure out how to not allocate a full hashset here. + featsmap.insert(feature, enable_disable == '+'); + } + + // rustc-specific features do not get passed down to GCC… + if RUSTC_SPECIFIC_FEATURES.contains(&feature) { + return None; + } + // ... otherwise though we run through `to_gcc_features` when + // passing requests down to GCC. This means that all in-language + // features also work on the command line instead of having two + // different names when the GCC name and the Rust name differ. + Some(to_gcc_features(sess, feature) + .iter() + .flat_map(|feat| to_gcc_features(sess, feat).into_iter()) + .map(|feature| { + if enable_disable == '-' { + format!("-{}", feature) + } + else { + feature.to_string() + } + }) + .collect::<Vec<_>>(), + ) + }) + .flatten(); + features.extend(feats); + + if diagnostics { + if let Some(f) = check_tied_features(sess, &featsmap) { + sess.emit_err(TargetFeatureDisableOrEnable { + features: f, + span: None, + missing_features: None, + }); + } + } + + features +} + +/// Returns a feature name for the given `+feature` or `-feature` string. +/// +/// Only allows features that are backend specific (i.e. not [`RUSTC_SPECIFIC_FEATURES`].) +fn backend_feature_name(s: &str) -> Option<&str> { + // features must start with a `+` or `-`. + let feature = s.strip_prefix(&['+', '-'][..]).unwrap_or_else(|| { + bug!("target feature `{}` must begin with a `+` or `-`", s); + }); + // Rustc-specific feature requests like `+crt-static` or `-crt-static` + // are not passed down to GCC. + if RUSTC_SPECIFIC_FEATURES.contains(&feature) { + return None; + } + Some(feature) +} + +// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html +pub fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> { + let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch }; + match (arch, s) { + ("x86", "sse4.2") => smallvec!["sse4.2", "crc32"], + ("x86", "pclmulqdq") => smallvec!["pclmul"], + ("x86", "rdrand") => smallvec!["rdrnd"], + ("x86", "bmi1") => smallvec!["bmi"], + ("x86", "cmpxchg16b") => smallvec!["cx16"], + ("x86", "avx512vaes") => smallvec!["vaes"], + ("x86", "avx512gfni") => smallvec!["gfni"], + ("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"], + // NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'. + ("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"], + // NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'. + ("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"], + ("aarch64", "rcpc2") => smallvec!["rcpc-immo"], + ("aarch64", "dpb") => smallvec!["ccpp"], + ("aarch64", "dpb2") => smallvec!["ccdp"], + ("aarch64", "frintts") => smallvec!["fptoint"], + ("aarch64", "fcma") => smallvec!["complxnum"], + ("aarch64", "pmuv3") => smallvec!["perfmon"], + ("aarch64", "paca") => smallvec!["pauth"], + ("aarch64", "pacg") => smallvec!["pauth"], + // Rust ties fp and neon together. In GCC neon implicitly enables fp, + // but we manually enable neon when a feature only implicitly enables fp + ("aarch64", "f32mm") => smallvec!["f32mm", "neon"], + ("aarch64", "f64mm") => smallvec!["f64mm", "neon"], + ("aarch64", "fhm") => smallvec!["fp16fml", "neon"], + ("aarch64", "fp16") => smallvec!["fullfp16", "neon"], + ("aarch64", "jsconv") => smallvec!["jsconv", "neon"], + ("aarch64", "sve") => smallvec!["sve", "neon"], + ("aarch64", "sve2") => smallvec!["sve2", "neon"], + ("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"], + ("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"], + ("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"], + ("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"], + (_, s) => smallvec![s], + } +} + +// Given a map from target_features to whether they are enabled or disabled, +// ensure only valid combinations are allowed. +pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> { + for tied in tied_target_features(sess) { + // Tied features must be set to the same value, or not set at all + let mut tied_iter = tied.iter(); + let enabled = features.get(tied_iter.next().unwrap()); + if tied_iter.any(|feature| enabled != features.get(feature)) { + return Some(tied); + } + } + None +} + +fn handle_native(name: &str) -> &str { + if name != "native" { + return name; + } + + #[cfg(feature="master")] + { + // Get the native arch. + let context = Context::default(); + context.get_target_info().arch().unwrap() + .to_str() + .unwrap() + } + #[cfg(not(feature="master"))] + unimplemented!(); +} + +pub fn target_cpu(sess: &Session) -> &str { + match sess.opts.cg.target_cpu { + Some(ref name) => handle_native(name), + None => handle_native(sess.target.cpu.as_ref()), + } +} diff --git a/compiler/rustc_codegen_gcc/src/int.rs b/compiler/rustc_codegen_gcc/src/int.rs index 0cf1204791d..58e0dd56f38 100644 --- a/compiler/rustc_codegen_gcc/src/int.rs +++ b/compiler/rustc_codegen_gcc/src/int.rs @@ -36,7 +36,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.cx.context.new_unary_op(None, operation, typ, a) } else { - // TODO(antoyo): use __negdi2 and __negti2 instead? let element_type = typ.dyncast_array().expect("element type"); let values = [ self.cx.context.new_unary_op(None, UnaryOp::BitwiseNegate, element_type, self.low(a)), @@ -52,9 +51,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.cx.context.new_unary_op(None, UnaryOp::Minus, a.get_type(), a) } else { - let param_a = self.context.new_parameter(None, a_type, "a"); - let func = self.context.new_function(None, FunctionType::Extern, a_type, &[param_a], "__negti2", false); - self.context.new_call(None, func, &[a]) + self.gcc_add(self.gcc_not(a), self.gcc_int(a_type, 1)) } } @@ -353,23 +350,63 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { (res.dereference(None).to_rvalue(), overflow) } - pub fn gcc_icmp(&self, op: IntPredicate, mut lhs: RValue<'gcc>, mut rhs: RValue<'gcc>) -> RValue<'gcc> { + pub fn gcc_icmp(&mut self, op: IntPredicate, mut lhs: RValue<'gcc>, mut rhs: RValue<'gcc>) -> RValue<'gcc> { let a_type = lhs.get_type(); let b_type = rhs.get_type(); if self.is_non_native_int_type(a_type) || self.is_non_native_int_type(b_type) { - let signed = a_type.is_compatible_with(self.i128_type); - let sign = - if signed { - "" - } - else { - "u" - }; - let func_name = format!("__{}cmpti2", sign); - let param_a = self.context.new_parameter(None, a_type, "a"); - let param_b = self.context.new_parameter(None, b_type, "b"); - let func = self.context.new_function(None, FunctionType::Extern, self.int_type, &[param_a, param_b], func_name, false); - let cmp = self.context.new_call(None, func, &[lhs, rhs]); + // This algorithm is based on compiler-rt's __cmpti2: + // https://github.com/llvm-mirror/compiler-rt/blob/f0745e8476f069296a7c71accedd061dce4cdf79/lib/builtins/cmpti2.c#L21 + let result = self.current_func().new_local(None, self.int_type, "icmp_result"); + let block1 = self.current_func().new_block("block1"); + let block2 = self.current_func().new_block("block2"); + let block3 = self.current_func().new_block("block3"); + let block4 = self.current_func().new_block("block4"); + let block5 = self.current_func().new_block("block5"); + let block6 = self.current_func().new_block("block6"); + let block7 = self.current_func().new_block("block7"); + let block8 = self.current_func().new_block("block8"); + let after = self.current_func().new_block("after"); + + let native_int_type = a_type.dyncast_array().expect("get element type"); + // NOTE: cast low to its unsigned type in order to perform a comparison correctly (e.g. + // the sign is only on high). + let unsigned_type = native_int_type.to_unsigned(&self.cx); + + let lhs_low = self.context.new_cast(None, self.low(lhs), unsigned_type); + let rhs_low = self.context.new_cast(None, self.low(rhs), unsigned_type); + + let condition = self.context.new_comparison(None, ComparisonOp::LessThan, self.high(lhs), self.high(rhs)); + self.llbb().end_with_conditional(None, condition, block1, block2); + + block1.add_assignment(None, result, self.context.new_rvalue_zero(self.int_type)); + block1.end_with_jump(None, after); + + let condition = self.context.new_comparison(None, ComparisonOp::GreaterThan, self.high(lhs), self.high(rhs)); + block2.end_with_conditional(None, condition, block3, block4); + + block3.add_assignment(None, result, self.context.new_rvalue_from_int(self.int_type, 2)); + block3.end_with_jump(None, after); + + let condition = self.context.new_comparison(None, ComparisonOp::LessThan, lhs_low, rhs_low); + block4.end_with_conditional(None, condition, block5, block6); + + block5.add_assignment(None, result, self.context.new_rvalue_zero(self.int_type)); + block5.end_with_jump(None, after); + + let condition = self.context.new_comparison(None, ComparisonOp::GreaterThan, lhs_low, rhs_low); + block6.end_with_conditional(None, condition, block7, block8); + + block7.add_assignment(None, result, self.context.new_rvalue_from_int(self.int_type, 2)); + block7.end_with_jump(None, after); + + block8.add_assignment(None, result, self.context.new_rvalue_one(self.int_type)); + block8.end_with_jump(None, after); + + // NOTE: since jumps were added in a place rustc does not expect, the current block in the + // state need to be updated. + self.switch_to_block(after); + + let cmp = result.to_rvalue(); let (op, limit) = match op { IntPredicate::IntEQ => { @@ -546,7 +583,12 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { } pub fn gcc_uint(&self, typ: Type<'gcc>, int: u64) -> RValue<'gcc> { - if self.is_native_int_type_or_bool(typ) { + if typ.is_u128(self) { + // FIXME(antoyo): libgccjit cannot create 128-bit values yet. + let num = self.context.new_rvalue_from_long(self.u64_type, int as i64); + self.gcc_int_cast(num, typ) + } + else if self.is_native_int_type_or_bool(typ) { self.context.new_rvalue_from_long(typ, u64::try_from(int).expect("u64::try_from") as i64) } else { @@ -572,6 +614,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { } } else if typ.is_i128(self) { + // FIXME(antoyo): libgccjit cannot create 128-bit values yet. let num = self.context.new_rvalue_from_long(self.u64_type, num as u64 as i64); self.gcc_int_cast(num, typ) } diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs index 438eab78943..e01299d32fd 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs @@ -2254,6 +2254,42 @@ match name { "llvm.hexagon.prefetch" => "__builtin_HEXAGON_prefetch", "llvm.hexagon.vmemcpy" => "__builtin_hexagon_vmemcpy", "llvm.hexagon.vmemset" => "__builtin_hexagon_vmemset", + // loongarch + "llvm.loongarch.asrtgt.d" => "__builtin_loongarch_asrtgt_d", + "llvm.loongarch.asrtle.d" => "__builtin_loongarch_asrtle_d", + "llvm.loongarch.break" => "__builtin_loongarch_break", + "llvm.loongarch.cacop.d" => "__builtin_loongarch_cacop_d", + "llvm.loongarch.cacop.w" => "__builtin_loongarch_cacop_w", + "llvm.loongarch.cpucfg" => "__builtin_loongarch_cpucfg", + "llvm.loongarch.crc.w.b.w" => "__builtin_loongarch_crc_w_b_w", + "llvm.loongarch.crc.w.d.w" => "__builtin_loongarch_crc_w_d_w", + "llvm.loongarch.crc.w.h.w" => "__builtin_loongarch_crc_w_h_w", + "llvm.loongarch.crc.w.w.w" => "__builtin_loongarch_crc_w_w_w", + "llvm.loongarch.crcc.w.b.w" => "__builtin_loongarch_crcc_w_b_w", + "llvm.loongarch.crcc.w.d.w" => "__builtin_loongarch_crcc_w_d_w", + "llvm.loongarch.crcc.w.h.w" => "__builtin_loongarch_crcc_w_h_w", + "llvm.loongarch.crcc.w.w.w" => "__builtin_loongarch_crcc_w_w_w", + "llvm.loongarch.csrrd.d" => "__builtin_loongarch_csrrd_d", + "llvm.loongarch.csrrd.w" => "__builtin_loongarch_csrrd_w", + "llvm.loongarch.csrwr.d" => "__builtin_loongarch_csrwr_d", + "llvm.loongarch.csrwr.w" => "__builtin_loongarch_csrwr_w", + "llvm.loongarch.csrxchg.d" => "__builtin_loongarch_csrxchg_d", + "llvm.loongarch.csrxchg.w" => "__builtin_loongarch_csrxchg_w", + "llvm.loongarch.dbar" => "__builtin_loongarch_dbar", + "llvm.loongarch.ibar" => "__builtin_loongarch_ibar", + "llvm.loongarch.iocsrrd.b" => "__builtin_loongarch_iocsrrd_b", + "llvm.loongarch.iocsrrd.d" => "__builtin_loongarch_iocsrrd_d", + "llvm.loongarch.iocsrrd.h" => "__builtin_loongarch_iocsrrd_h", + "llvm.loongarch.iocsrrd.w" => "__builtin_loongarch_iocsrrd_w", + "llvm.loongarch.iocsrwr.b" => "__builtin_loongarch_iocsrwr_b", + "llvm.loongarch.iocsrwr.d" => "__builtin_loongarch_iocsrwr_d", + "llvm.loongarch.iocsrwr.h" => "__builtin_loongarch_iocsrwr_h", + "llvm.loongarch.iocsrwr.w" => "__builtin_loongarch_iocsrwr_w", + "llvm.loongarch.lddir.d" => "__builtin_loongarch_lddir_d", + "llvm.loongarch.ldpte.d" => "__builtin_loongarch_ldpte_d", + "llvm.loongarch.movfcsr2gr" => "__builtin_loongarch_movfcsr2gr", + "llvm.loongarch.movgr2fcsr" => "__builtin_loongarch_movgr2fcsr", + "llvm.loongarch.syscall" => "__builtin_loongarch_syscall", // mips "llvm.mips.absq.s.ph" => "__builtin_mips_absq_s_ph", "llvm.mips.absq.s.qb" => "__builtin_mips_absq_s_qb", @@ -2954,6 +2990,8 @@ match name { "llvm.nvvm.barrier0.and" => "__nvvm_bar0_and", "llvm.nvvm.barrier0.or" => "__nvvm_bar0_or", "llvm.nvvm.barrier0.popc" => "__nvvm_bar0_popc", + "llvm.nvvm.bf2h.rn" => "__nvvm_bf2h_rn", + "llvm.nvvm.bf2h.rn.ftz" => "__nvvm_bf2h_rn_ftz", "llvm.nvvm.bitcast.d2ll" => "__nvvm_bitcast_d2ll", "llvm.nvvm.bitcast.f2i" => "__nvvm_bitcast_f2i", "llvm.nvvm.bitcast.i2f" => "__nvvm_bitcast_i2f", @@ -3016,8 +3054,6 @@ match name { "llvm.nvvm.div.rz.ftz.f" => "__nvvm_div_rz_ftz_f", "llvm.nvvm.ex2.approx.d" => "__nvvm_ex2_approx_d", "llvm.nvvm.ex2.approx.f" => "__nvvm_ex2_approx_f", - "llvm.nvvm.ex2.approx.f16" => "__nvvm_ex2_approx_f16", - "llvm.nvvm.ex2.approx.f16x2" => "__nvvm_ex2_approx_f16x2", "llvm.nvvm.ex2.approx.ftz.f" => "__nvvm_ex2_approx_ftz_f", "llvm.nvvm.f2bf16.rn" => "__nvvm_f2bf16_rn", "llvm.nvvm.f2bf16.rn.relu" => "__nvvm_f2bf16_rn_relu", @@ -3079,11 +3115,17 @@ match name { "llvm.nvvm.fma.rn.bf16x2" => "__nvvm_fma_rn_bf16x2", "llvm.nvvm.fma.rn.d" => "__nvvm_fma_rn_d", "llvm.nvvm.fma.rn.f" => "__nvvm_fma_rn_f", - "llvm.nvvm.fma.rn.f16" => "__nvvm_fma_rn_f16", - "llvm.nvvm.fma.rn.f16x2" => "__nvvm_fma_rn_f16x2", + "llvm.nvvm.fma.rn.ftz.bf16" => "__nvvm_fma_rn_ftz_bf16", + "llvm.nvvm.fma.rn.ftz.bf16x2" => "__nvvm_fma_rn_ftz_bf16x2", "llvm.nvvm.fma.rn.ftz.f" => "__nvvm_fma_rn_ftz_f", + "llvm.nvvm.fma.rn.ftz.relu.bf16" => "__nvvm_fma_rn_ftz_relu_bf16", + "llvm.nvvm.fma.rn.ftz.relu.bf16x2" => "__nvvm_fma_rn_ftz_relu_bf16x2", + "llvm.nvvm.fma.rn.ftz.sat.bf16" => "__nvvm_fma_rn_ftz_sat_bf16", + "llvm.nvvm.fma.rn.ftz.sat.bf16x2" => "__nvvm_fma_rn_ftz_sat_bf16x2", "llvm.nvvm.fma.rn.relu.bf16" => "__nvvm_fma_rn_relu_bf16", "llvm.nvvm.fma.rn.relu.bf16x2" => "__nvvm_fma_rn_relu_bf16x2", + "llvm.nvvm.fma.rn.sat.bf16" => "__nvvm_fma_rn_sat_bf16", + "llvm.nvvm.fma.rn.sat.bf16x2" => "__nvvm_fma_rn_sat_bf16x2", "llvm.nvvm.fma.rp.d" => "__nvvm_fma_rp_d", "llvm.nvvm.fma.rp.f" => "__nvvm_fma_rp_f", "llvm.nvvm.fma.rp.ftz.f" => "__nvvm_fma_rp_ftz_f", @@ -3094,11 +3136,17 @@ match name { "llvm.nvvm.fmax.bf16x2" => "__nvvm_fmax_bf16x2", "llvm.nvvm.fmax.d" => "__nvvm_fmax_d", "llvm.nvvm.fmax.f" => "__nvvm_fmax_f", - "llvm.nvvm.fmax.f16" => "__nvvm_fmax_f16", - "llvm.nvvm.fmax.f16x2" => "__nvvm_fmax_f16x2", + "llvm.nvvm.fmax.ftz.bf16" => "__nvvm_fmax_ftz_bf16", + "llvm.nvvm.fmax.ftz.bf16x2" => "__nvvm_fmax_ftz_bf16x2", "llvm.nvvm.fmax.ftz.f" => "__nvvm_fmax_ftz_f", + "llvm.nvvm.fmax.ftz.nan.bf16" => "__nvvm_fmax_ftz_nan_bf16", + "llvm.nvvm.fmax.ftz.nan.bf16x2" => "__nvvm_fmax_ftz_nan_bf16x2", "llvm.nvvm.fmax.ftz.nan.f" => "__nvvm_fmax_ftz_nan_f", + "llvm.nvvm.fmax.ftz.nan.xorsign.abs.bf16" => "__nvvm_fmax_ftz_nan_xorsign_abs_bf16", + "llvm.nvvm.fmax.ftz.nan.xorsign.abs.bf16x2" => "__nvvm_fmax_ftz_nan_xorsign_abs_bf16x2", "llvm.nvvm.fmax.ftz.nan.xorsign.abs.f" => "__nvvm_fmax_ftz_nan_xorsign_abs_f", + "llvm.nvvm.fmax.ftz.xorsign.abs.bf16" => "__nvvm_fmax_ftz_xorsign_abs_bf16", + "llvm.nvvm.fmax.ftz.xorsign.abs.bf16x2" => "__nvvm_fmax_ftz_xorsign_abs_bf16x2", "llvm.nvvm.fmax.ftz.xorsign.abs.f" => "__nvvm_fmax_ftz_xorsign_abs_f", "llvm.nvvm.fmax.nan.bf16" => "__nvvm_fmax_nan_bf16", "llvm.nvvm.fmax.nan.bf16x2" => "__nvvm_fmax_nan_bf16x2", @@ -3113,11 +3161,17 @@ match name { "llvm.nvvm.fmin.bf16x2" => "__nvvm_fmin_bf16x2", "llvm.nvvm.fmin.d" => "__nvvm_fmin_d", "llvm.nvvm.fmin.f" => "__nvvm_fmin_f", - "llvm.nvvm.fmin.f16" => "__nvvm_fmin_f16", - "llvm.nvvm.fmin.f16x2" => "__nvvm_fmin_f16x2", + "llvm.nvvm.fmin.ftz.bf16" => "__nvvm_fmin_ftz_bf16", + "llvm.nvvm.fmin.ftz.bf16x2" => "__nvvm_fmin_ftz_bf16x2", "llvm.nvvm.fmin.ftz.f" => "__nvvm_fmin_ftz_f", + "llvm.nvvm.fmin.ftz.nan.bf16" => "__nvvm_fmin_ftz_nan_bf16", + "llvm.nvvm.fmin.ftz.nan.bf16x2" => "__nvvm_fmin_ftz_nan_bf16x2", "llvm.nvvm.fmin.ftz.nan.f" => "__nvvm_fmin_ftz_nan_f", + "llvm.nvvm.fmin.ftz.nan.xorsign.abs.bf16" => "__nvvm_fmin_ftz_nan_xorsign_abs_bf16", + "llvm.nvvm.fmin.ftz.nan.xorsign.abs.bf16x2" => "__nvvm_fmin_ftz_nan_xorsign_abs_bf16x2", "llvm.nvvm.fmin.ftz.nan.xorsign.abs.f" => "__nvvm_fmin_ftz_nan_xorsign_abs_f", + "llvm.nvvm.fmin.ftz.xorsign.abs.bf16" => "__nvvm_fmin_ftz_xorsign_abs_bf16", + "llvm.nvvm.fmin.ftz.xorsign.abs.bf16x2" => "__nvvm_fmin_ftz_xorsign_abs_bf16x2", "llvm.nvvm.fmin.ftz.xorsign.abs.f" => "__nvvm_fmin_ftz_xorsign_abs_f", "llvm.nvvm.fmin.nan.bf16" => "__nvvm_fmin_nan_bf16", "llvm.nvvm.fmin.nan.bf16x2" => "__nvvm_fmin_nan_bf16x2", @@ -4213,6 +4267,28 @@ match name { "llvm.r600.read.tgid.x" => "__builtin_r600_read_tgid_x", "llvm.r600.read.tgid.y" => "__builtin_r600_read_tgid_y", "llvm.r600.read.tgid.z" => "__builtin_r600_read_tgid_z", + // riscv + "llvm.riscv.aes32dsi" => "__builtin_riscv_aes32dsi", + "llvm.riscv.aes32dsmi" => "__builtin_riscv_aes32dsmi", + "llvm.riscv.aes32esi" => "__builtin_riscv_aes32esi", + "llvm.riscv.aes32esmi" => "__builtin_riscv_aes32esmi", + "llvm.riscv.aes64ds" => "__builtin_riscv_aes64ds", + "llvm.riscv.aes64dsm" => "__builtin_riscv_aes64dsm", + "llvm.riscv.aes64es" => "__builtin_riscv_aes64es", + "llvm.riscv.aes64esm" => "__builtin_riscv_aes64esm", + "llvm.riscv.aes64im" => "__builtin_riscv_aes64im", + "llvm.riscv.aes64ks1i" => "__builtin_riscv_aes64ks1i", + "llvm.riscv.aes64ks2" => "__builtin_riscv_aes64ks2", + "llvm.riscv.sha512sig0" => "__builtin_riscv_sha512sig0", + "llvm.riscv.sha512sig0h" => "__builtin_riscv_sha512sig0h", + "llvm.riscv.sha512sig0l" => "__builtin_riscv_sha512sig0l", + "llvm.riscv.sha512sig1" => "__builtin_riscv_sha512sig1", + "llvm.riscv.sha512sig1h" => "__builtin_riscv_sha512sig1h", + "llvm.riscv.sha512sig1l" => "__builtin_riscv_sha512sig1l", + "llvm.riscv.sha512sum0" => "__builtin_riscv_sha512sum0", + "llvm.riscv.sha512sum0r" => "__builtin_riscv_sha512sum0r", + "llvm.riscv.sha512sum1" => "__builtin_riscv_sha512sum1", + "llvm.riscv.sha512sum1r" => "__builtin_riscv_sha512sum1r", // s390 "llvm.s390.efpc" => "__builtin_s390_efpc", "llvm.s390.etnd" => "__builtin_tx_nesting_depth", @@ -5912,6 +5988,18 @@ match name { "llvm.x86.avx2.vpdpbuud.256" => "__builtin_ia32_vpdpbuud256", "llvm.x86.avx2.vpdpbuuds.128" => "__builtin_ia32_vpdpbuuds128", "llvm.x86.avx2.vpdpbuuds.256" => "__builtin_ia32_vpdpbuuds256", + "llvm.x86.avx2.vpdpwsud.128" => "__builtin_ia32_vpdpwsud128", + "llvm.x86.avx2.vpdpwsud.256" => "__builtin_ia32_vpdpwsud256", + "llvm.x86.avx2.vpdpwsuds.128" => "__builtin_ia32_vpdpwsuds128", + "llvm.x86.avx2.vpdpwsuds.256" => "__builtin_ia32_vpdpwsuds256", + "llvm.x86.avx2.vpdpwusd.128" => "__builtin_ia32_vpdpwusd128", + "llvm.x86.avx2.vpdpwusd.256" => "__builtin_ia32_vpdpwusd256", + "llvm.x86.avx2.vpdpwusds.128" => "__builtin_ia32_vpdpwusds128", + "llvm.x86.avx2.vpdpwusds.256" => "__builtin_ia32_vpdpwusds256", + "llvm.x86.avx2.vpdpwuud.128" => "__builtin_ia32_vpdpwuud128", + "llvm.x86.avx2.vpdpwuud.256" => "__builtin_ia32_vpdpwuud256", + "llvm.x86.avx2.vpdpwuuds.128" => "__builtin_ia32_vpdpwuuds128", + "llvm.x86.avx2.vpdpwuuds.256" => "__builtin_ia32_vpdpwuuds256", "llvm.x86.avx2.vperm2i128" => "__builtin_ia32_permti256", "llvm.x86.avx512.add.pd.512" => "__builtin_ia32_addpd512", "llvm.x86.avx512.add.ps.512" => "__builtin_ia32_addps512", @@ -7909,6 +7997,16 @@ match name { "llvm.x86.vgf2p8mulb.128" => "__builtin_ia32_vgf2p8mulb_v16qi", "llvm.x86.vgf2p8mulb.256" => "__builtin_ia32_vgf2p8mulb_v32qi", "llvm.x86.vgf2p8mulb.512" => "__builtin_ia32_vgf2p8mulb_v64qi", + "llvm.x86.vsha512msg1" => "__builtin_ia32_vsha512msg1", + "llvm.x86.vsha512msg2" => "__builtin_ia32_vsha512msg2", + "llvm.x86.vsha512rnds2" => "__builtin_ia32_vsha512rnds2", + "llvm.x86.vsm3msg1" => "__builtin_ia32_vsm3msg1", + "llvm.x86.vsm3msg2" => "__builtin_ia32_vsm3msg2", + "llvm.x86.vsm3rnds2" => "__builtin_ia32_vsm3rnds2", + "llvm.x86.vsm4key4128" => "__builtin_ia32_vsm4key4128", + "llvm.x86.vsm4key4256" => "__builtin_ia32_vsm4key4256", + "llvm.x86.vsm4rnds4128" => "__builtin_ia32_vsm4rnds4128", + "llvm.x86.vsm4rnds4256" => "__builtin_ia32_vsm4rnds4256", "llvm.x86.wbinvd" => "__builtin_ia32_wbinvd", "llvm.x86.wbnoinvd" => "__builtin_ia32_wbnoinvd", "llvm.x86.wrfsbase.32" => "__builtin_ia32_wrfsbase32", diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs index f28348380d7..5996623bdc5 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs @@ -236,11 +236,17 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let arg2 = builder.context.new_cast(None, arg2, arg2_type); args = vec![new_args[0], arg2].into(); }, + // These builtins are sent one more argument than needed. "__builtin_prefetch" => { let mut new_args = args.to_vec(); new_args.pop(); args = new_args.into(); }, + // The GCC version returns one value of the tuple through a pointer. + "__builtin_ia32_rdrand64_step" => { + let arg = builder.current_func().new_local(None, builder.ulonglong_type, "return_rdrand_arg"); + args = vec![arg.get_address(None)].into(); + }, _ => (), } } @@ -361,6 +367,19 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, // builtin twice, we overwrite the return value with a dummy value. return_value = builder.context.new_rvalue_zero(builder.int_type); }, + "__builtin_ia32_rdrand64_step" => { + let random_number = args[0].dereference(None).to_rvalue(); + let success_variable = builder.current_func().new_local(None, return_value.get_type(), "success"); + builder.llbb().add_assignment(None, success_variable, return_value); + + let field1 = builder.context.new_field(None, random_number.get_type(), "random_number"); + let field2 = builder.context.new_field(None, return_value.get_type(), "success"); + let struct_type = builder.context.new_struct_type(None, "rdrand_result", &[field1, field2]); + return_value = builder.context.new_struct_constructor(None, struct_type.as_type(), None, &[ + random_number, + success_variable.to_rvalue(), + ]); + }, _ => (), } @@ -613,6 +632,7 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function "llvm.fshr.v8i16" => "__builtin_ia32_vpshrdv_v8hi", "llvm.x86.fma.vfmadd.sd" => "__builtin_ia32_vfmaddsd3", "llvm.x86.fma.vfmadd.ss" => "__builtin_ia32_vfmaddss3", + "llvm.x86.rdrand.64" => "__builtin_ia32_rdrand64_step", // The above doc points to unknown builtins for the following, so override them: "llvm.x86.avx2.gather.d.d" => "__builtin_ia32_gathersiv4si", diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs index 68a087a1d7f..9caed459a29 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -10,9 +10,9 @@ use rustc_codegen_ssa::base::wants_msvc_seh; use rustc_codegen_ssa::common::IntPredicate; use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue}; use rustc_codegen_ssa::mir::place::PlaceRef; -use rustc_codegen_ssa::traits::{ArgAbiMethods, BaseTypeMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods}; +use rustc_codegen_ssa::traits::{ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods}; #[cfg(feature="master")] -use rustc_codegen_ssa::traits::MiscMethods; +use rustc_codegen_ssa::traits::{BaseTypeMethods, MiscMethods}; use rustc_codegen_ssa::errors::InvalidMonomorphization; use rustc_middle::bug; use rustc_middle::ty::{self, Instance, Ty}; diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs index ce7e31682f1..fe233930560 100644 --- a/compiler/rustc_codegen_gcc/src/lib.rs +++ b/compiler/rustc_codegen_gcc/src/lib.rs @@ -2,6 +2,12 @@ * TODO(antoyo): implement equality in libgccjit based on https://zpz.github.io/blog/overloading-equality-operator-in-cpp-class-hierarchy/ (for type equality?) * TODO(antoyo): support #[inline] attributes. * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html). + * For Thin LTO, this might be helpful: + * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none. + * + * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html + * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans. + * TODO: disable debug info always being emitted. Perhaps this slows down things? * * TODO(antoyo): remove the patches. */ @@ -28,6 +34,7 @@ extern crate rustc_codegen_ssa; extern crate rustc_data_structures; extern crate rustc_errors; extern crate rustc_fluent_macro; +extern crate rustc_fs_util; extern crate rustc_hir; extern crate rustc_macros; extern crate rustc_metadata; @@ -35,7 +42,8 @@ extern crate rustc_middle; extern crate rustc_session; extern crate rustc_span; extern crate rustc_target; -extern crate tempfile; +#[macro_use] +extern crate tracing; // This prevents duplicating functions and statics that are already part of the host rustc process. #[allow(unused_extern_crates)] @@ -57,6 +65,7 @@ mod coverageinfo; mod debuginfo; mod declare; mod errors; +mod gcc_util; mod int; mod intrinsic; mod mono_item; @@ -64,18 +73,27 @@ mod type_; mod type_of; use std::any::Any; -use std::sync::{Arc, Mutex}; - -use crate::errors::LTONotSupported; -use gccjit::{Context, OptimizationLevel, CType}; +use std::sync::Arc; +use std::sync::Mutex; +#[cfg(not(feature="master"))] +use std::sync::atomic::AtomicBool; +#[cfg(not(feature="master"))] +use std::sync::atomic::Ordering; + +use gccjit::{Context, OptimizationLevel}; +#[cfg(feature="master")] +use gccjit::TargetInfo; +#[cfg(not(feature="master"))] +use gccjit::CType; +use errors::LTONotSupported; use rustc_ast::expand::allocator::AllocatorKind; use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen}; use rustc_codegen_ssa::base::codegen_crate; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn}; use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule}; use rustc_codegen_ssa::target_features::supported_target_features; -use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods}; use rustc_data_structures::fx::FxIndexMap; +use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods}; use rustc_errors::{DiagnosticMessage, ErrorGuaranteed, Handler, SubdiagnosticMessage}; use rustc_fluent_macro::fluent_messages; use rustc_metadata::EncodedMetadata; @@ -88,6 +106,9 @@ use rustc_span::Symbol; use rustc_span::fatal_error::FatalError; use tempfile::TempDir; +use crate::back::lto::ModuleBuffer; +use crate::gcc_util::target_cpu; + fluent_messages! { "../messages.ftl" } pub struct PrintOnPanic<F: Fn() -> String>(pub F); @@ -100,9 +121,41 @@ impl<F: Fn() -> String> Drop for PrintOnPanic<F> { } } +#[cfg(not(feature="master"))] +#[derive(Debug)] +pub struct TargetInfo { + supports_128bit_integers: AtomicBool, +} + +#[cfg(not(feature="master"))] +impl TargetInfo { + fn cpu_supports(&self, _feature: &str) -> bool { + false + } + + fn supports_128bit_int(&self) -> bool { + self.supports_128bit_integers.load(Ordering::SeqCst) + } +} + +#[derive(Clone, Debug)] +pub struct LockedTargetInfo { + info: Arc<Mutex<TargetInfo>>, +} + +impl LockedTargetInfo { + fn cpu_supports(&self, feature: &str) -> bool { + self.info.lock().expect("lock").cpu_supports(feature) + } + + fn supports_128bit_int(&self) -> bool { + self.info.lock().expect("lock").supports_128bit_int() + } +} + #[derive(Clone)] pub struct GccCodegenBackend { - supports_128bit_integers: Arc<Mutex<bool>>, + target_info: LockedTargetInfo, } impl CodegenBackend for GccCodegenBackend { @@ -112,24 +165,40 @@ impl CodegenBackend for GccCodegenBackend { fn init(&self, sess: &Session) { #[cfg(feature="master")] + { + let target_cpu = target_cpu(sess); + + // Get the second TargetInfo with the correct CPU features by setting the arch. + let context = Context::default(); + if target_cpu != "generic" { + context.add_command_line_option(&format!("-march={}", target_cpu)); + } + + *self.target_info.info.lock().expect("lock") = context.get_target_info(); + } + + #[cfg(feature="master")] gccjit::set_global_personality_function_name(b"rust_eh_personality\0"); - if sess.lto() != Lto::No { + if sess.lto() == Lto::Thin { sess.emit_warning(LTONotSupported {}); } - let temp_dir = TempDir::new().expect("cannot create temporary directory"); - let temp_file = temp_dir.into_path().join("result.asm"); - let check_context = Context::default(); - check_context.set_print_errors_to_stderr(false); - let _int128_ty = check_context.new_c_type(CType::UInt128t); - // NOTE: we cannot just call compile() as this would require other files than libgccjit.so. - check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str")); - *self.supports_128bit_integers.lock().expect("lock") = check_context.get_last_error() == Ok(None); + #[cfg(not(feature="master"))] + { + let temp_dir = TempDir::new().expect("cannot create temporary directory"); + let temp_file = temp_dir.into_path().join("result.asm"); + let check_context = Context::default(); + check_context.set_print_errors_to_stderr(false); + let _int128_ty = check_context.new_c_type(CType::UInt128t); + // NOTE: we cannot just call compile() as this would require other files than libgccjit.so. + check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str")); + self.target_info.info.lock().expect("lock").supports_128bit_integers.store(check_context.get_last_error() == Ok(None), Ordering::SeqCst); + } } fn provide(&self, providers: &mut Providers) { - // FIXME(antoyo) compute list of enabled features from cli flags - providers.global_backend_features = |_tcx, ()| vec![]; + providers.global_backend_features = + |tcx, ()| gcc_util::global_gcc_features(tcx.sess, true) } fn codegen_crate<'tcx>(&self, tcx: TyCtxt<'tcx>, metadata: EncodedMetadata, need_metadata_module: bool) -> Box<dyn Any> { @@ -160,7 +229,7 @@ impl CodegenBackend for GccCodegenBackend { } fn target_features(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> { - target_features(sess, allow_unstable) + target_features(sess, allow_unstable, &self.target_info) } } @@ -168,13 +237,18 @@ impl ExtraBackendMethods for GccCodegenBackend { fn codegen_allocator<'tcx>(&self, tcx: TyCtxt<'tcx>, module_name: &str, kind: AllocatorKind, alloc_error_handler_kind: AllocatorKind) -> Self::Module { let mut mods = GccContext { context: Context::default(), + should_combine_object_files: false, + temp_dir: None, }; + + // TODO(antoyo): only set for x86. + mods.context.add_command_line_option("-masm=intel"); unsafe { allocator::codegen(tcx, &mut mods, module_name, kind, alloc_error_handler_kind); } mods } fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen<Self::Module>, u64) { - base::compile_codegen_unit(tcx, cgu_name, *self.supports_128bit_integers.lock().expect("lock")) + base::compile_codegen_unit(tcx, cgu_name, self.target_info.clone()) } fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn<Self> { @@ -185,14 +259,6 @@ impl ExtraBackendMethods for GccCodegenBackend { } } -pub struct ModuleBuffer; - -impl ModuleBufferMethods for ModuleBuffer { - fn data(&self) -> &[u8] { - unimplemented!(); - } -} - pub struct ThinBuffer; impl ThinBufferMethods for ThinBuffer { @@ -203,6 +269,9 @@ impl ThinBufferMethods for ThinBuffer { pub struct GccContext { context: Context<'static>, + should_combine_object_files: bool, + // Temporary directory used by LTO. We keep it here so that it's not removed before linking. + temp_dir: Option<TempDir>, } unsafe impl Send for GccContext {} @@ -217,18 +286,8 @@ impl WriteBackendMethods for GccCodegenBackend { type ThinData = (); type ThinBuffer = ThinBuffer; - fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLtoInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> { - // TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins. - // NOTE: implemented elsewhere. - // TODO(antoyo): what is implemented elsewhere ^ ? - let module = - match modules.remove(0) { - FatLtoInput::InMemory(module) => module, - FatLtoInput::Serialized { .. } => { - unimplemented!(); - } - }; - Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: vec![] }) + fn run_fat_lto(cgcx: &CodegenContext<Self>, modules: Vec<FatLtoInput<Self>>, cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> { + back::lto::run_fat(cgcx, modules, cached_modules) } fn run_thin_lto(_cgcx: &CodegenContext<Self>, _modules: Vec<(String, Self::ThinBuffer)>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> { @@ -277,8 +336,19 @@ impl WriteBackendMethods for GccCodegenBackend { /// This is the entrypoint for a hot plugged rustc_codegen_gccjit #[no_mangle] pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> { + #[cfg(feature="master")] + let info = { + // Check whether the target supports 128-bit integers. + let context = Context::default(); + Arc::new(Mutex::new(context.get_target_info())) + }; + #[cfg(not(feature="master"))] + let info = Arc::new(Mutex::new(TargetInfo { + supports_128bit_integers: AtomicBool::new(false), + })); + Box::new(GccCodegenBackend { - supports_128bit_integers: Arc::new(Mutex::new(false)), + target_info: LockedTargetInfo { info }, }) } @@ -297,22 +367,7 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel { } } -fn handle_native(name: &str) -> &str { - if name != "native" { - return name; - } - - unimplemented!(); -} - -pub fn target_cpu(sess: &Session) -> &str { - match sess.opts.cg.target_cpu { - Some(ref name) => handle_native(name), - None => handle_native(sess.target.cpu.as_ref()), - } -} - -pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> { +pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &LockedTargetInfo) -> Vec<Symbol> { supported_target_features(sess) .iter() .filter_map( @@ -321,26 +376,13 @@ pub fn target_features(sess: &Session, allow_unstable: bool) -> Vec<Symbol> { }, ) .filter(|_feature| { - // TODO(antoyo): implement a way to get enabled feature in libgccjit. - // Probably using the equivalent of __builtin_cpu_supports. - // TODO(antoyo): maybe use whatever outputs the following command: - // gcc -march=native -Q --help=target - #[cfg(feature="master")] - { - // NOTE: the CPU in the CI doesn't support sse4a, so disable it to make the stdarch tests pass in the CI. - (_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512") && !_feature.contains("sse4a") - } - #[cfg(not(feature="master"))] - { - false - } + target_info.cpu_supports(_feature) /* adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512ifma, avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq, bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm, sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves */ - //false }) .map(|feature| Symbol::intern(feature)) .collect() diff --git a/compiler/rustc_codegen_gcc/src/type_of.rs b/compiler/rustc_codegen_gcc/src/type_of.rs index cc467801beb..c2eab295acd 100644 --- a/compiler/rustc_codegen_gcc/src/type_of.rs +++ b/compiler/rustc_codegen_gcc/src/type_of.rs @@ -182,6 +182,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> { /// of that field's type - this is useful for taking the address of /// that field and ensuring the struct has the right alignment. fn gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> { + use crate::rustc_middle::ty::layout::FnAbiOf; // This must produce the same result for `repr(transparent)` wrappers as for the inner type! // In other words, this should generally not look at the type at all, but only at the // layout. @@ -191,7 +192,14 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> { if let Some(&ty) = cx.scalar_types.borrow().get(&self.ty) { return ty; } - let ty = self.scalar_gcc_type_at(cx, scalar, Size::ZERO); + let ty = + match *self.ty.kind() { + // NOTE: we cannot remove this match like in the LLVM codegen because the call + // to fn_ptr_backend_type handle the on-stack attribute. + // TODO(antoyo): find a less hackish way to hande the on-stack attribute. + ty::FnPtr(sig) => cx.fn_ptr_backend_type(&cx.fn_abi_of_fn_ptr(sig, ty::List::empty())), + _ => self.scalar_gcc_type_at(cx, scalar, Size::ZERO), + }; cx.scalar_types.borrow_mut().insert(self.ty, ty); return ty; } |
