diff options
| author | Guillaume Gomez <guillaume.gomez@huawei.com> | 2024-07-10 12:44:23 +0200 |
|---|---|---|
| committer | Guillaume Gomez <guillaume.gomez@huawei.com> | 2024-07-10 12:44:23 +0200 |
| commit | 7cbe50e2098c35fda06433cd36bbced941607317 (patch) | |
| tree | 5f93154e463e7258902781d746195519e20a9fc6 /compiler/rustc_codegen_gcc/src | |
| parent | 649feb9c1a3c56650a4b6fa638b23103cbcd0dcd (diff) | |
| parent | 98ed962c7d3eebe12c97588e61245273d265e72f (diff) | |
| download | rust-7cbe50e2098c35fda06433cd36bbced941607317.tar.gz rust-7cbe50e2098c35fda06433cd36bbced941607317.zip | |
Merge commit '98ed962c7d3eebe12c97588e61245273d265e72f' into master
Diffstat (limited to 'compiler/rustc_codegen_gcc/src')
22 files changed, 1137 insertions, 317 deletions
diff --git a/compiler/rustc_codegen_gcc/src/abi.rs b/compiler/rustc_codegen_gcc/src/abi.rs index b098594dbcc..166dd080cf2 100644 --- a/compiler/rustc_codegen_gcc/src/abi.rs +++ b/compiler/rustc_codegen_gcc/src/abi.rs @@ -4,6 +4,7 @@ use gccjit::{ToLValue, ToRValue, Type}; use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods}; use rustc_data_structures::fx::FxHashSet; use rustc_middle::bug; +use rustc_middle::ty::layout::LayoutOf; use rustc_middle::ty::Ty; #[cfg(feature = "master")] use rustc_session::config; @@ -184,9 +185,17 @@ impl<'gcc, 'tcx> FnAbiGccExt<'gcc, 'tcx> for FnAbi<'tcx, Ty<'tcx>> { } PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => { assert!(!on_stack); - let ty = - apply_attrs(cx.type_ptr_to(arg.memory_ty(cx)), &attrs, argument_tys.len()); - apply_attrs(ty, &meta_attrs, argument_tys.len()) + // Construct the type of a (wide) pointer to `ty`, and pass its two fields. + // Any two ABI-compatible unsized types have the same metadata type and + // moreover the same metadata value leads to the same dynamic size and + // alignment, so this respects ABI compatibility. + let ptr_ty = Ty::new_mut_ptr(cx.tcx, arg.layout.ty); + let ptr_layout = cx.layout_of(ptr_ty); + let typ1 = ptr_layout.scalar_pair_element_gcc_type(cx, 0); + let typ2 = ptr_layout.scalar_pair_element_gcc_type(cx, 1); + argument_tys.push(apply_attrs(typ1, &attrs, argument_tys.len())); + argument_tys.push(apply_attrs(typ2, &meta_attrs, argument_tys.len())); + continue; } }; argument_tys.push(arg_ty); diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs index 06b14a1f118..aa485846cd4 100644 --- a/compiler/rustc_codegen_gcc/src/asm.rs +++ b/compiler/rustc_codegen_gcc/src/asm.rs @@ -115,7 +115,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { span: &[Span], instance: Instance<'_>, dest: Option<Self::BasicBlock>, - _catch_funclet: Option<(Self::BasicBlock, Option<&Self::Funclet>)>, + _dest_catch_funclet: Option<(Self::BasicBlock, Option<&Self::Funclet>)>, ) { if options.contains(InlineAsmOptions::MAY_UNWIND) { self.sess().dcx().create_err(UnwindingInlineAsm { span: span[0] }).emit(); @@ -485,9 +485,8 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { } InlineAsmOperandRef::Label { label } => { - let label_gcc_index = labels.iter() - .position(|&l| l == label) - .expect("wrong rust index"); + let label_gcc_index = + labels.iter().position(|&l| l == label).expect("wrong rust index"); let gcc_index = label_gcc_index + outputs.len() + inputs.len(); push_to_template(Some('l'), gcc_index); } @@ -538,9 +537,8 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { } if dest.is_none() && options.contains(InlineAsmOptions::NORETURN) { let builtin_unreachable = self.context.get_builtin_function("__builtin_unreachable"); - let builtin_unreachable: RValue<'gcc> = unsafe { - std::mem::transmute(builtin_unreachable) - }; + let builtin_unreachable: RValue<'gcc> = + unsafe { std::mem::transmute(builtin_unreachable) }; self.call(self.type_void(), None, None, builtin_unreachable, &[], None, None); } @@ -696,10 +694,12 @@ fn reg_to_gcc(reg: InlineAsmRegOrRegClass) -> ConstraintOrRegister { fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegClass) -> Type<'gcc> { match reg { InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::reg) => cx.type_i32(), - InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => unimplemented!(), InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg) | InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16) => { - unimplemented!() + cx.type_vector(cx.type_i64(), 2) + } + InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => { + unreachable!("clobber-only") } InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg) @@ -710,21 +710,13 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low8) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::qreg_low4) => { - unimplemented!() + cx.type_vector(cx.type_i64(), 2) } - InlineAsmRegClass::Avr(_) => unimplemented!(), - InlineAsmRegClass::Bpf(_) => unimplemented!(), InlineAsmRegClass::Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::LoongArch(LoongArchInlineAsmRegClass::freg) => cx.type_f32(), - InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(), - InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(), - InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(), - InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(), - InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(), InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::Mips(MipsInlineAsmRegClass::freg) => cx.type_f32(), - InlineAsmRegClass::Msp430(_) => unimplemented!(), InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg16) => cx.type_i16(), InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg32) => cx.type_i32(), InlineAsmRegClass::Nvptx(NvptxInlineAsmRegClass::reg64) => cx.type_i64(), @@ -737,26 +729,43 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl } InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::freg) => cx.type_f32(), - InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => cx.type_f32(), + InlineAsmRegClass::RiscV(RiscVInlineAsmRegClass::vreg) => { + unreachable!("clobber-only") + } InlineAsmRegClass::X86(X86InlineAsmRegClass::reg) | InlineAsmRegClass::X86(X86InlineAsmRegClass::reg_abcd) => cx.type_i32(), InlineAsmRegClass::X86(X86InlineAsmRegClass::reg_byte) => cx.type_i8(), - InlineAsmRegClass::X86(X86InlineAsmRegClass::mmx_reg) => unimplemented!(), InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg) | InlineAsmRegClass::X86(X86InlineAsmRegClass::ymm_reg) | InlineAsmRegClass::X86(X86InlineAsmRegClass::zmm_reg) => cx.type_f32(), - InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg) => unimplemented!(), InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg) => cx.type_i16(), - InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) => cx.type_i16(), - InlineAsmRegClass::X86(X86InlineAsmRegClass::tmm_reg) => unimplemented!(), - InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(), - InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => { - bug!("LLVM backend does not support SPIR-V") + InlineAsmRegClass::X86(X86InlineAsmRegClass::x87_reg) + | InlineAsmRegClass::X86(X86InlineAsmRegClass::mmx_reg) + | InlineAsmRegClass::X86(X86InlineAsmRegClass::kreg0) + | InlineAsmRegClass::X86(X86InlineAsmRegClass::tmm_reg) => { + unreachable!("clobber-only") } + InlineAsmRegClass::Wasm(WasmInlineAsmRegClass::local) => cx.type_i32(), + InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::reg) => cx.type_i64(), + InlineAsmRegClass::Bpf(BpfInlineAsmRegClass::wreg) => cx.type_i32(), + InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg) => cx.type_i8(), + InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_upper) => cx.type_i8(), + InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_pair) => cx.type_i16(), + InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_iw) => cx.type_i16(), + InlineAsmRegClass::Avr(AvrInlineAsmRegClass::reg_ptr) => cx.type_i16(), InlineAsmRegClass::S390x( S390xInlineAsmRegClass::reg | S390xInlineAsmRegClass::reg_addr, ) => cx.type_i32(), InlineAsmRegClass::S390x(S390xInlineAsmRegClass::freg) => cx.type_f64(), + InlineAsmRegClass::Msp430(Msp430InlineAsmRegClass::reg) => cx.type_i16(), + InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg) => cx.type_i32(), + InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_addr) => cx.type_i32(), + InlineAsmRegClass::M68k(M68kInlineAsmRegClass::reg_data) => cx.type_i32(), + InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::reg) => cx.type_i32(), + InlineAsmRegClass::CSKY(CSKYInlineAsmRegClass::freg) => cx.type_f32(), + InlineAsmRegClass::SpirV(SpirVInlineAsmRegClass::reg) => { + bug!("GCC backend does not support SPIR-V") + } InlineAsmRegClass::Err => unreachable!(), } } diff --git a/compiler/rustc_codegen_gcc/src/attributes.rs b/compiler/rustc_codegen_gcc/src/attributes.rs index 8602566ab8f..27f21107eda 100644 --- a/compiler/rustc_codegen_gcc/src/attributes.rs +++ b/compiler/rustc_codegen_gcc/src/attributes.rs @@ -92,7 +92,7 @@ pub fn from_fn_attrs<'gcc, 'tcx>( let mut function_features = function_features .iter() .flat_map(|feat| to_gcc_features(cx.tcx.sess, feat).into_iter()) - .chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x { + .chain(codegen_fn_attrs.instruction_set.iter().map(|x| match *x { InstructionSetAttr::ArmA32 => "-thumb-mode", // TODO(antoyo): support removing feature. InstructionSetAttr::ArmT32 => "thumb-mode", })) @@ -118,8 +118,8 @@ pub fn from_fn_attrs<'gcc, 'tcx>( if feature.starts_with('-') { Some(format!("no{}", feature)) - } else if feature.starts_with('+') { - Some(feature[1..].to_string()) + } else if let Some(stripped) = feature.strip_prefix('+') { + Some(stripped.to_string()) } else { Some(feature.to_string()) } @@ -128,6 +128,12 @@ pub fn from_fn_attrs<'gcc, 'tcx>( .join(","); if !target_features.is_empty() { #[cfg(feature = "master")] - func.add_attribute(FnAttribute::Target(&target_features)); + match cx.sess().target.arch.as_ref() { + "x86" | "x86_64" | "powerpc" => { + func.add_attribute(FnAttribute::Target(&target_features)) + } + // The target attribute is not supported on other targets in GCC. + _ => (), + } } } diff --git a/compiler/rustc_codegen_gcc/src/back/lto.rs b/compiler/rustc_codegen_gcc/src/back/lto.rs index ec70fbdddb0..6b2dbbbed67 100644 --- a/compiler/rustc_codegen_gcc/src/back/lto.rs +++ b/compiler/rustc_codegen_gcc/src/back/lto.rs @@ -16,13 +16,14 @@ // /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o // /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o // /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization -use std::ffi::CString; +use std::ffi::{CStr, CString}; use std::fs::{self, File}; use std::path::{Path, PathBuf}; +use std::sync::Arc; -use gccjit::OutputKind; +use gccjit::{Context, OutputKind}; use object::read::archive::ArchiveFile; -use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule}; +use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared}; use rustc_codegen_ssa::back::symbol_export; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput}; use rustc_codegen_ssa::traits::*; @@ -30,6 +31,7 @@ use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind}; use rustc_data_structures::memmap::Mmap; use rustc_errors::{DiagCtxtHandle, FatalError}; use rustc_hir::def_id::LOCAL_CRATE; +use rustc_middle::bug; use rustc_middle::dep_graph::WorkProduct; use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel}; use rustc_session::config::{CrateType, Lto}; @@ -37,7 +39,7 @@ use tempfile::{tempdir, TempDir}; use crate::back::write::save_temp_bitcode; use crate::errors::{DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib}; -use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext}; +use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext, SyncContext}; /// We keep track of the computed LTO cache keys from the previous /// session to determine which CGUs we can reuse. @@ -128,8 +130,7 @@ fn prepare_lto( } let archive_data = unsafe { - Mmap::map(File::open(&path).expect("couldn't open rlib")) - .expect("couldn't map rlib") + Mmap::map(File::open(path).expect("couldn't open rlib")).expect("couldn't map rlib") }; let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib"); let obj_files = archive @@ -349,6 +350,395 @@ impl ModuleBuffer { impl ModuleBufferMethods for ModuleBuffer { fn data(&self) -> &[u8] { - unimplemented!("data not needed for GCC codegen"); + &[] } } + +/// Performs thin LTO by performing necessary global analysis and returning two +/// lists, one of the modules that need optimization and another for modules that +/// can simply be copied over from the incr. comp. cache. +pub(crate) fn run_thin( + cgcx: &CodegenContext<GccCodegenBackend>, + modules: Vec<(String, ThinBuffer)>, + cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, +) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> { + let dcx = cgcx.create_dcx(); + let dcx = dcx.handle(); + let lto_data = prepare_lto(cgcx, dcx)?; + /*let symbols_below_threshold = + symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/ + if cgcx.opts.cg.linker_plugin_lto.enabled() { + unreachable!( + "We should never reach this case if the LTO step \ + is deferred to the linker" + ); + } + thin_lto( + cgcx, + dcx, + modules, + lto_data.upstream_modules, + lto_data.tmp_path, + cached_modules, /*, &symbols_below_threshold*/ + ) +} + +pub(crate) fn prepare_thin( + module: ModuleCodegen<GccContext>, + _emit_summary: bool, +) -> (String, ThinBuffer) { + let name = module.name; + //let buffer = ThinBuffer::new(module.module_llvm.context, true, emit_summary); + let buffer = ThinBuffer::new(&module.module_llvm.context); + (name, buffer) +} + +/// Prepare "thin" LTO to get run on these modules. +/// +/// The general structure of ThinLTO is quite different from the structure of +/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into +/// one giant LLVM module, and then we run more optimization passes over this +/// big module after internalizing most symbols. Thin LTO, on the other hand, +/// avoid this large bottleneck through more targeted optimization. +/// +/// At a high level Thin LTO looks like: +/// +/// 1. Prepare a "summary" of each LLVM module in question which describes +/// the values inside, cost of the values, etc. +/// 2. Merge the summaries of all modules in question into one "index" +/// 3. Perform some global analysis on this index +/// 4. For each module, use the index and analysis calculated previously to +/// perform local transformations on the module, for example inlining +/// small functions from other modules. +/// 5. Run thin-specific optimization passes over each module, and then code +/// generate everything at the end. +/// +/// The summary for each module is intended to be quite cheap, and the global +/// index is relatively quite cheap to create as well. As a result, the goal of +/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more +/// situations. For example one cheap optimization is that we can parallelize +/// all codegen modules, easily making use of all the cores on a machine. +/// +/// With all that in mind, the function here is designed at specifically just +/// calculating the *index* for ThinLTO. This index will then be shared amongst +/// all of the `LtoModuleCodegen` units returned below and destroyed once +/// they all go out of scope. +fn thin_lto( + cgcx: &CodegenContext<GccCodegenBackend>, + _dcx: DiagCtxtHandle<'_>, + modules: Vec<(String, ThinBuffer)>, + serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, + tmp_path: TempDir, + cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, + //symbols_below_threshold: &[*const libc::c_char], +) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> { + let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis"); + info!("going for that thin, thin LTO"); + + /*let green_modules: FxHashMap<_, _> = + cached_modules.iter().map(|(_, wp)| (wp.cgu_name.clone(), wp.clone())).collect();*/ + + let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len(); + let mut thin_buffers = Vec::with_capacity(modules.len()); + let mut module_names = Vec::with_capacity(full_scope_len); + //let mut thin_modules = Vec::with_capacity(full_scope_len); + + for (i, (name, buffer)) in modules.into_iter().enumerate() { + info!("local module: {} - {}", i, name); + let cname = CString::new(name.as_bytes()).unwrap(); + /*thin_modules.push(llvm::ThinLTOModule { + identifier: cname.as_ptr(), + data: buffer.data().as_ptr(), + len: buffer.data().len(), + });*/ + thin_buffers.push(buffer); + module_names.push(cname); + } + + // FIXME: All upstream crates are deserialized internally in the + // function below to extract their summary and modules. Note that + // unlike the loop above we *must* decode and/or read something + // here as these are all just serialized files on disk. An + // improvement, however, to make here would be to store the + // module summary separately from the actual module itself. Right + // now this is store in one large bitcode file, and the entire + // file is deflate-compressed. We could try to bypass some of the + // decompression by storing the index uncompressed and only + // lazily decompressing the bytecode if necessary. + // + // Note that truly taking advantage of this optimization will + // likely be further down the road. We'd have to implement + // incremental ThinLTO first where we could actually avoid + // looking at upstream modules entirely sometimes (the contents, + // we must always unconditionally look at the index). + let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len()); + + let cached_modules = + cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap())); + + for (module, name) in serialized_modules.into_iter().chain(cached_modules) { + info!("upstream or cached module {:?}", name); + /*thin_modules.push(llvm::ThinLTOModule { + identifier: name.as_ptr(), + data: module.data().as_ptr(), + len: module.data().len(), + });*/ + + match module { + SerializedModule::Local(_) => { + //let path = module_buffer.0.to_str().expect("path"); + //let my_path = PathBuf::from(path); + //let exists = my_path.exists(); + /*module.module_llvm.should_combine_object_files = true; + module + .module_llvm + .context + .add_driver_option(module_buffer.0.to_str().expect("path"));*/ + } + SerializedModule::FromRlib(_) => unimplemented!("from rlib"), + SerializedModule::FromUncompressedFile(_) => { + unimplemented!("from uncompressed file") + } + } + + serialized.push(module); + module_names.push(name); + } + + // Sanity check + //assert_eq!(thin_modules.len(), module_names.len()); + + // Delegate to the C++ bindings to create some data here. Once this is a + // tried-and-true interface we may wish to try to upstream some of this + // to LLVM itself, right now we reimplement a lot of what they do + // upstream... + /*let data = llvm::LLVMRustCreateThinLTOData( + thin_modules.as_ptr(), + thin_modules.len() as u32, + symbols_below_threshold.as_ptr(), + symbols_below_threshold.len() as u32, + ) + .ok_or_else(|| write::llvm_err(dcx, LlvmError::PrepareThinLtoContext))?; + */ + + let data = ThinData; //(Arc::new(tmp_path))/*(data)*/; + + info!("thin LTO data created"); + + /*let (key_map_path, prev_key_map, curr_key_map) = + if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir { + let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME); + // If the previous file was deleted, or we get an IO error + // reading the file, then we'll just use `None` as the + // prev_key_map, which will force the code to be recompiled. + let prev = + if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None }; + let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names); + (Some(path), prev, curr) + } + else { + // If we don't compile incrementally, we don't need to load the + // import data from LLVM. + assert!(green_modules.is_empty()); + let curr = ThinLTOKeysMap::default(); + (None, None, curr) + }; + info!("thin LTO cache key map loaded"); + info!("prev_key_map: {:#?}", prev_key_map); + info!("curr_key_map: {:#?}", curr_key_map);*/ + + // Throw our data in an `Arc` as we'll be sharing it across threads. We + // also put all memory referenced by the C++ data (buffers, ids, etc) + // into the arc as well. After this we'll create a thin module + // codegen per module in this data. + let shared = + Arc::new(ThinShared { data, thin_buffers, serialized_modules: serialized, module_names }); + + let copy_jobs = vec![]; + let mut opt_jobs = vec![]; + + info!("checking which modules can be-reused and which have to be re-optimized."); + for (module_index, module_name) in shared.module_names.iter().enumerate() { + let module_name = module_name_to_str(module_name); + /*if let (Some(prev_key_map), true) = + (prev_key_map.as_ref(), green_modules.contains_key(module_name)) + { + assert!(cgcx.incr_comp_session_dir.is_some()); + + // If a module exists in both the current and the previous session, + // and has the same LTO cache key in both sessions, then we can re-use it + if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) { + let work_product = green_modules[module_name].clone(); + copy_jobs.push(work_product); + info!(" - {}: re-used", module_name); + assert!(cgcx.incr_comp_session_dir.is_some()); + continue; + } + }*/ + + info!(" - {}: re-compiled", module_name); + opt_jobs + .push(LtoModuleCodegen::Thin(ThinModule { shared: shared.clone(), idx: module_index })); + } + + // Save the current ThinLTO import information for the next compilation + // session, overwriting the previous serialized data (if any). + /*if let Some(path) = key_map_path { + if let Err(err) = curr_key_map.save_to_file(&path) { + return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err })); + } + }*/ + + // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead + // of now. + //module.module_llvm.temp_dir = Some(tmp_path); + // TODO: save the directory so that it gets deleted later. + std::mem::forget(tmp_path); + + Ok((opt_jobs, copy_jobs)) +} + +pub unsafe fn optimize_thin_module( + thin_module: ThinModule<GccCodegenBackend>, + _cgcx: &CodegenContext<GccCodegenBackend>, +) -> Result<ModuleCodegen<GccContext>, FatalError> { + //let dcx = cgcx.create_dcx(); + + //let module_name = &thin_module.shared.module_names[thin_module.idx]; + /*let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap()); + let tm = (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&dcx, e))?;*/ + + // Right now the implementation we've got only works over serialized + // modules, so we create a fresh new LLVM context and parse the module + // into that context. One day, however, we may do this for upstream + // crates but for locally codegened modules we may be able to reuse + // that LLVM Context and Module. + //let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); + //let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &dcx)? as *const _; + let mut should_combine_object_files = false; + let context = match thin_module.shared.thin_buffers.get(thin_module.idx) { + Some(thin_buffer) => Arc::clone(&thin_buffer.context), + None => { + let context = Context::default(); + let len = thin_module.shared.thin_buffers.len(); + let module = &thin_module.shared.serialized_modules[thin_module.idx - len]; + match *module { + SerializedModule::Local(ref module_buffer) => { + let path = module_buffer.0.to_str().expect("path"); + context.add_driver_option(path); + should_combine_object_files = true; + /*module.module_llvm.should_combine_object_files = true; + module + .module_llvm + .context + .add_driver_option(module_buffer.0.to_str().expect("path"));*/ + } + SerializedModule::FromRlib(_) => unimplemented!("from rlib"), + SerializedModule::FromUncompressedFile(_) => { + unimplemented!("from uncompressed file") + } + } + Arc::new(SyncContext::new(context)) + } + }; + let module = ModuleCodegen { + module_llvm: GccContext { context, should_combine_object_files, temp_dir: None }, + name: thin_module.name().to_string(), + kind: ModuleKind::Regular, + }; + /*{ + let target = &*module.module_llvm.tm; + let llmod = module.module_llvm.llmod(); + save_temp_bitcode(cgcx, &module, "thin-lto-input"); + + // Up next comes the per-module local analyses that we do for Thin LTO. + // Each of these functions is basically copied from the LLVM + // implementation and then tailored to suit this implementation. Ideally + // each of these would be supported by upstream LLVM but that's perhaps + // a patch for another day! + // + // You can find some more comments about these functions in the LLVM + // bindings we've got (currently `PassWrapper.cpp`) + { + let _timer = + cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name()); + if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) { + return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule)); + } + save_temp_bitcode(cgcx, &module, "thin-lto-after-rename"); + } + + { + let _timer = cgcx + .prof + .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name()); + if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) { + return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule)); + } + save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve"); + } + + { + let _timer = cgcx + .prof + .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name()); + if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) { + return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule)); + } + save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize"); + } + + { + let _timer = + cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name()); + if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) { + return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule)); + } + save_temp_bitcode(cgcx, &module, "thin-lto-after-import"); + } + + // Alright now that we've done everything related to the ThinLTO + // analysis it's time to run some optimizations! Here we use the same + // `run_pass_manager` as the "fat" LTO above except that we tell it to + // populate a thin-specific pass manager, which presumably LLVM treats a + // little differently. + { + info!("running thin lto passes over {}", module.name); + run_pass_manager(cgcx, &dcx, &mut module, true)?; + save_temp_bitcode(cgcx, &module, "thin-lto-after-pm"); + } + }*/ + Ok(module) +} + +pub struct ThinBuffer { + context: Arc<SyncContext>, +} + +// TODO: check if this makes sense to make ThinBuffer Send and Sync. +unsafe impl Send for ThinBuffer {} +unsafe impl Sync for ThinBuffer {} + +impl ThinBuffer { + pub(crate) fn new(context: &Arc<SyncContext>) -> Self { + Self { context: Arc::clone(context) } + } +} + +impl ThinBufferMethods for ThinBuffer { + fn data(&self) -> &[u8] { + &[] + } + + fn thin_link_data(&self) -> &[u8] { + unimplemented!(); + } +} + +pub struct ThinData; //(Arc<TempDir>); + +fn module_name_to_str(c_str: &CStr) -> &str { + c_str.to_str().unwrap_or_else(|e| { + bug!("Encountered non-utf8 GCC module name `{}`: {}", c_str.to_string_lossy(), e) + }) +} diff --git a/compiler/rustc_codegen_gcc/src/back/write.rs b/compiler/rustc_codegen_gcc/src/back/write.rs index b9c7f72d0b7..802968979c7 100644 --- a/compiler/rustc_codegen_gcc/src/back/write.rs +++ b/compiler/rustc_codegen_gcc/src/back/write.rs @@ -31,6 +31,7 @@ pub(crate) unsafe fn codegen( // NOTE: Only generate object files with GIMPLE when this environment variable is set for // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit). + // TODO: remove this environment variable. let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1"); let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); @@ -56,6 +57,8 @@ pub(crate) unsafe fn codegen( .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name); context.add_command_line_option("-flto=auto"); context.add_command_line_option("-flto-partition=one"); + // TODO: remove since we don't want fat objects when it is for Bitcode only. + context.add_command_line_option("-ffat-lto-objects"); context .compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str")); } @@ -104,7 +107,7 @@ pub(crate) unsafe fn codegen( // FIXME(antoyo): segfault in dump_reproducer_to_file() might be caused by // transmuting an rvalue to an lvalue. // Segfault is actually in gcc::jit::reproducer::get_identifier_as_lvalue - context.dump_reproducer_to_file(&format!("/tmp/reproducers/{}.c", module.name)); + context.dump_reproducer_to_file(format!("/tmp/reproducers/{}.c", module.name)); println!("Dumped reproducer {}", module.name); } if env::var("CG_GCCJIT_DUMP_TO_FILE").as_deref() == Ok("1") { @@ -113,17 +116,20 @@ pub(crate) unsafe fn codegen( context.set_debug_info(true); context.dump_to_file(path, true); } - if should_combine_object_files && fat_lto { - context.add_command_line_option("-flto=auto"); - context.add_command_line_option("-flto-partition=one"); + if should_combine_object_files { + if fat_lto { + context.add_command_line_option("-flto=auto"); + context.add_command_line_option("-flto-partition=one"); + + // NOTE: without -fuse-linker-plugin, we get the following error: + // lto1: internal compiler error: decompressed stream: Destination buffer is too small + context.add_driver_option("-fuse-linker-plugin"); + } context.add_driver_option("-Wl,-r"); // NOTE: we need -nostdlib, otherwise, we get the following error: // /usr/bin/ld: cannot find -lgcc_s: No such file or directory context.add_driver_option("-nostdlib"); - // NOTE: without -fuse-linker-plugin, we get the following error: - // lto1: internal compiler error: decompressed stream: Destination buffer is too small - context.add_driver_option("-fuse-linker-plugin"); // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o. context.compile_to_file( diff --git a/compiler/rustc_codegen_gcc/src/base.rs b/compiler/rustc_codegen_gcc/src/base.rs index 2a2d5741d13..be149ffe5a1 100644 --- a/compiler/rustc_codegen_gcc/src/base.rs +++ b/compiler/rustc_codegen_gcc/src/base.rs @@ -1,8 +1,9 @@ use std::collections::HashSet; use std::env; +use std::sync::Arc; use std::time::Instant; -use gccjit::{FunctionType, GlobalKind}; +use gccjit::{CType, FunctionType, GlobalKind}; use rustc_codegen_ssa::base::maybe_create_entry_wrapper; use rustc_codegen_ssa::mono_item::MonoItemExt; use rustc_codegen_ssa::traits::DebugInfoMethods; @@ -18,8 +19,8 @@ use rustc_target::spec::PanicStrategy; use crate::builder::Builder; use crate::context::CodegenCx; -use crate::GccContext; use crate::{gcc_util, new_context, LockedTargetInfo}; +use crate::{GccContext, SyncContext}; #[cfg(feature = "master")] pub fn visibility_to_gcc(linkage: Visibility) -> gccjit::Visibility { @@ -135,7 +136,7 @@ pub fn compile_codegen_unit( let target_cpu = gcc_util::target_cpu(tcx.sess); if target_cpu != "generic" { - context.add_command_line_option(&format!("-march={}", target_cpu)); + context.add_command_line_option(format!("-march={}", target_cpu)); } if tcx @@ -181,7 +182,24 @@ pub fn compile_codegen_unit( context.set_allow_unreachable_blocks(true); { - let cx = CodegenCx::new(&context, cgu, tcx, target_info.supports_128bit_int()); + // TODO: to make it less error-prone (calling get_target_info() will add the flag + // -fsyntax-only), forbid the compilation when get_target_info() is called on a + // context. + let f16_type_supported = target_info.supports_target_dependent_type(CType::Float16); + let f32_type_supported = target_info.supports_target_dependent_type(CType::Float32); + let f64_type_supported = target_info.supports_target_dependent_type(CType::Float64); + let f128_type_supported = target_info.supports_target_dependent_type(CType::Float128); + // TODO: improve this to avoid passing that many arguments. + let cx = CodegenCx::new( + &context, + cgu, + tcx, + target_info.supports_128bit_int(), + f16_type_supported, + f32_type_supported, + f64_type_supported, + f128_type_supported, + ); let mono_items = cgu.items_in_deterministic_order(tcx); for &(mono_item, data) in &mono_items { @@ -205,7 +223,11 @@ pub fn compile_codegen_unit( ModuleCodegen { name: cgu_name.to_string(), - module_llvm: GccContext { context, should_combine_object_files: false, temp_dir: None }, + module_llvm: GccContext { + context: Arc::new(SyncContext::new(context)), + should_combine_object_files: false, + temp_dir: None, + }, kind: ModuleKind::Regular, } } diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index 4a3b6f678c4..b7f62fd09d2 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -25,7 +25,7 @@ use rustc_middle::ty::layout::{ FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasParamEnv, HasTyCtxt, LayoutError, LayoutOfHelpers, TyAndLayout, }; -use rustc_middle::ty::{ParamEnv, Ty, TyCtxt, Instance}; +use rustc_middle::ty::{Instance, ParamEnv, Ty, TyCtxt}; use rustc_span::def_id::DefId; use rustc_span::Span; use rustc_target::abi::{ @@ -68,7 +68,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { src: RValue<'gcc>, order: AtomicOrdering, ) -> RValue<'gcc> { - let size = src.get_type().get_size(); + let size = get_maybe_pointer_size(src); let func = self.current_func(); @@ -138,7 +138,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { failure_order: AtomicOrdering, weak: bool, ) -> RValue<'gcc> { - let size = src.get_type().get_size(); + let size = get_maybe_pointer_size(src); let compare_exchange = self.context.get_builtin_function(&format!("__atomic_compare_exchange_{}", size)); let order = self.context.new_rvalue_from_int(self.i32_type, order.to_gcc()); @@ -153,7 +153,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // NOTE: not sure why, but we have the wrong type here. let int_type = compare_exchange.get_param(2).to_rvalue().get_type(); - let src = self.context.new_cast(self.location, src, int_type); + let src = self.context.new_bitcast(self.location, src, int_type); self.context.new_call( self.location, compare_exchange, @@ -190,8 +190,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let casted_args: Vec<_> = param_types .into_iter() .zip(args.iter()) - .enumerate() - .map(|(_i, (expected_ty, &actual_val))| { + .map(|(expected_ty, &actual_val)| { let actual_ty = actual_val.get_type(); if expected_ty != actual_ty { self.bitcast(actual_val, expected_ty) @@ -225,7 +224,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let mut on_stack_param_indices = FxHashSet::default(); if let Some(indices) = self.on_stack_params.borrow().get(&gcc_func) { - on_stack_param_indices = indices.clone(); + on_stack_param_indices.clone_from(indices); } if all_args_match { @@ -253,11 +252,26 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { { self.context.new_cast(self.location, actual_val, expected_ty) } else if on_stack_param_indices.contains(&index) { - actual_val.dereference(self.location).to_rvalue() + let ty = actual_val.get_type(); + // It's possible that the value behind the pointer is actually not exactly + // the expected type, so to go around that, we add a cast before + // dereferencing the value. + if let Some(pointee_val) = ty.get_pointee() + && pointee_val != expected_ty + { + let new_val = self.context.new_cast( + self.location, + actual_val, + expected_ty.make_pointer(), + ); + new_val.dereference(self.location).to_rvalue() + } else { + actual_val.dereference(self.location).to_rvalue() + } } else { assert!( - !((actual_ty.is_vector() && !expected_ty.is_vector()) - || (!actual_ty.is_vector() && expected_ty.is_vector())), + (!expected_ty.is_vector() || actual_ty.is_vector()) + && (expected_ty.is_vector() || !actual_ty.is_vector()), "{:?} ({}) -> {:?} ({}), index: {:?}[{}]", actual_ty, actual_ty.is_vector(), @@ -277,8 +291,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { .collect(); // NOTE: to take into account variadic functions. - for i in casted_args.len()..args.len() { - casted_args.push(args[i]); + for arg in args.iter().skip(casted_args.len()) { + casted_args.push(*arg); } Cow::Owned(casted_args) @@ -353,7 +367,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let function_address_names = self.function_address_names.borrow(); let original_function_name = function_address_names.get(&func_ptr); llvm::adjust_intrinsic_arguments( - &self, + self, gcc_func, args.into(), &func_name, @@ -361,7 +375,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { ) }; let args_adjusted = args.len() != previous_arg_count; - let args = self.check_ptr_call("call", func_ptr, &*args); + let args = self.check_ptr_call("call", func_ptr, &args); // gccjit requires to use the result of functions, even when it's not used. // That's why we assign the result to a local or call add_eval(). @@ -373,7 +387,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { unsafe { RETURN_VALUE_COUNT += 1 }; let return_value = self.cx.context.new_call_through_ptr(self.location, func_ptr, &args); let return_value = llvm::adjust_intrinsic_return_value( - &self, + self, return_value, &func_name, &args, @@ -441,7 +455,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.block.add_assignment( self.location, result, - self.cx.context.new_call(self.location, func, &args), + self.cx.context.new_call(self.location, func, args), ); result.to_rvalue() } @@ -596,7 +610,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { ) -> RValue<'gcc> { let try_block = self.current_func().new_block("try"); - let current_block = self.block.clone(); + let current_block = self.block; self.block = try_block; let call = self.call(typ, fn_attrs, None, func, args, None, instance); // TODO(antoyo): use funclet here? self.block = current_block; @@ -630,8 +644,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { then: Block<'gcc>, catch: Block<'gcc>, _funclet: Option<&Funclet>, + instance: Option<Instance<'tcx>>, ) -> RValue<'gcc> { - let call_site = self.call(typ, fn_attrs, None, func, args, None); + let call_site = self.call(typ, fn_attrs, None, func, args, None, instance); let condition = self.context.new_rvalue_from_int(self.bool_type, 1); self.llbb().end_with_conditional(self.location, condition, then, catch); if let Some(_fn_abi) = fn_abi { @@ -749,6 +764,24 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // FIXME(antoyo): this seems to produce the wrong result. return self.context.new_call(self.location, fmodf, &[a, b]); } + + #[cfg(feature = "master")] + match self.cx.type_kind(a_type) { + TypeKind::Half | TypeKind::Float => { + let fmodf = self.context.get_builtin_function("fmodf"); + return self.context.new_call(self.location, fmodf, &[a, b]); + } + TypeKind::Double => { + let fmod = self.context.get_builtin_function("fmod"); + return self.context.new_call(self.location, fmod, &[a, b]); + } + TypeKind::FP128 => { + let fmodl = self.context.get_builtin_function("fmodl"); + return self.context.new_call(self.location, fmodl, &[a, b]); + } + _ => (), + } + if let Some(vector_type) = a_type_unqualified.dyncast_vector() { assert_eq!(a_type_unqualified, b.get_type().unqualified()); @@ -903,11 +936,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // TODO(antoyo): It might be better to return a LValue, but fixing the rustc API is non-trivial. self.stack_var_count.set(self.stack_var_count.get() + 1); self.current_func() - .new_local( - self.location, - ty, - &format!("stack_var_{}", self.stack_var_count.get()), - ) + .new_local(self.location, ty, &format!("stack_var_{}", self.stack_var_count.get())) .get_address(self.location) } @@ -993,7 +1022,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { } } - let val = if let Some(_) = place.val.llextra { + let val = if place.val.llextra.is_some() { // FIXME: Merge with the `else` below? OperandValue::Ref(place.val) } else if place.layout.is_gcc_immediate() { @@ -1125,7 +1154,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // the following cast is required to avoid this error: // gcc_jit_context_new_call: mismatching types for argument 2 of function "__atomic_store_4": assignment to param arg1 (type: int) from loadedValue3577 (type: unsigned int __attribute__((aligned(4)))) let int_type = atomic_store.get_param(1).to_rvalue().get_type(); - let value = self.context.new_cast(self.location, value, int_type); + let value = self.context.new_bitcast(self.location, value, int_type); self.llbb().add_eval( self.location, self.context.new_call(self.location, atomic_store, &[ptr, value, ordering]), @@ -1172,7 +1201,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // NOTE: due to opaque pointers now being used, we need to cast here. let ptr = self.context.new_cast(self.location, ptr, typ.make_pointer()); // NOTE: array indexing is always considered in bounds in GCC (TODO(antoyo): to be verified). - let mut indices = indices.into_iter(); + let mut indices = indices.iter(); let index = indices.next().expect("first index in inbounds_gep"); let mut result = self.context.new_array_access(self.location, ptr, *index); for index in indices { @@ -1589,7 +1618,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { src: RValue<'gcc>, order: AtomicOrdering, ) -> RValue<'gcc> { - let size = src.get_type().get_size(); + let size = get_maybe_pointer_size(src); let name = match op { AtomicRmwBinOp::AtomicXchg => format!("__atomic_exchange_{}", size), AtomicRmwBinOp::AtomicAdd => format!("__atomic_fetch_add_{}", size), @@ -1620,7 +1649,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { let dst = self.context.new_cast(self.location, dst, volatile_void_ptr_type); // FIXME(antoyo): not sure why, but we have the wrong type here. let new_src_type = atomic_function.get_param(1).to_rvalue().get_type(); - let src = self.context.new_cast(self.location, src, new_src_type); + let src = self.context.new_bitcast(self.location, src, new_src_type); let res = self.context.new_call(self.location, atomic_function, &[dst, src, order]); self.context.new_cast(self.location, res, src.get_type()) } @@ -1661,7 +1690,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { _instance: Option<Instance<'tcx>>, ) -> RValue<'gcc> { // FIXME(antoyo): remove when having a proper API. - let gcc_func = unsafe { std::mem::transmute(func) }; + let gcc_func = unsafe { std::mem::transmute::<RValue<'gcc>, Function<'gcc>>(func) }; let call = if self.functions.borrow().values().any(|value| *value == gcc_func) { self.function_call(func, args, funclet) } else { @@ -1676,11 +1705,6 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { fn zext(&mut self, value: RValue<'gcc>, dest_typ: Type<'gcc>) -> RValue<'gcc> { // FIXME(antoyo): this does not zero-extend. - if value.get_type().is_bool() && dest_typ.is_i8(&self.cx) { - // FIXME(antoyo): hack because base::from_immediate converts i1 to i8. - // Fix the code in codegen_ssa::base::from_immediate. - return value; - } self.gcc_int_cast(value, dest_typ) } @@ -2049,7 +2073,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.context.new_rvalue_from_vector(self.location, mask_type, &vector_elements); let shifted = self.context.new_rvalue_vector_perm(self.location, res, res, mask); shift *= 2; - res = op(res, shifted, &self.context); + res = op(res, shifted, self.context); } self.context .new_vector_access(self.location, res, self.context.new_rvalue_zero(self.int_type)) @@ -2065,7 +2089,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } pub fn vector_reduce_op(&mut self, src: RValue<'gcc>, op: BinaryOp) -> RValue<'gcc> { - let loc = self.location.clone(); + let loc = self.location; self.vector_reduce(src, |a, b, context| context.new_binary_op(loc, op, a.get_type(), a, b)) } @@ -2082,7 +2106,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); let element_count = vector_type.get_num_units(); (0..element_count) - .into_iter() .map(|i| { self.context .new_vector_access( @@ -2113,7 +2136,6 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let vector_type = src.get_type().unqualified().dyncast_vector().expect("vector type"); let element_count = vector_type.get_num_units(); (0..element_count) - .into_iter() .map(|i| { self.context .new_vector_access( @@ -2133,7 +2155,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // Inspired by Hacker's Delight min implementation. pub fn vector_reduce_min(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { - let loc = self.location.clone(); + let loc = self.location; self.vector_reduce(src, |a, b, context| { let differences_or_zeros = difference_or_zero(loc, a, b, context); context.new_binary_op(loc, BinaryOp::Plus, b.get_type(), b, differences_or_zeros) @@ -2142,7 +2164,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // Inspired by Hacker's Delight max implementation. pub fn vector_reduce_max(&mut self, src: RValue<'gcc>) -> RValue<'gcc> { - let loc = self.location.clone(); + let loc = self.location; self.vector_reduce(src, |a, b, context| { let differences_or_zeros = difference_or_zero(loc, a, b, context); context.new_binary_op(loc, BinaryOp::Minus, a.get_type(), a, differences_or_zeros) @@ -2337,7 +2359,13 @@ impl<'tcx> HasParamEnv<'tcx> for Builder<'_, '_, 'tcx> { impl<'tcx> HasTargetSpec for Builder<'_, '_, 'tcx> { fn target_spec(&self) -> &Target { - &self.cx.target_spec() + self.cx.target_spec() + } +} + +impl<'tcx> HasWasmCAbiOpt for Builder<'_, '_, 'tcx> { + fn wasm_c_abi_opt(&self) -> WasmCAbi { + self.cx.wasm_c_abi_opt() } } @@ -2422,3 +2450,19 @@ impl ToGccOrdering for AtomicOrdering { ordering as i32 } } + +// Needed because gcc 12 `get_size()` doesn't work on pointers. +#[cfg(feature = "master")] +fn get_maybe_pointer_size(value: RValue<'_>) -> u32 { + value.get_type().get_size() +} + +#[cfg(not(feature = "master"))] +fn get_maybe_pointer_size(value: RValue<'_>) -> u32 { + let type_ = value.get_type(); + if type_.get_pointee().is_some() { + std::mem::size_of::<*const ()>() as _ + } else { + type_.get_size() + } +} diff --git a/compiler/rustc_codegen_gcc/src/callee.rs b/compiler/rustc_codegen_gcc/src/callee.rs index 84f49b6856d..9ad2e90122f 100644 --- a/compiler/rustc_codegen_gcc/src/callee.rs +++ b/compiler/rustc_codegen_gcc/src/callee.rs @@ -28,7 +28,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>) let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty()); - let func = if let Some(_func) = cx.get_declared_value(&sym) { + let func = if let Some(_func) = cx.get_declared_value(sym) { // FIXME(antoyo): we never reach this because get_declared_value only returns global variables // and here we try to get a function. unreachable!(); @@ -68,7 +68,7 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>) }*/ } else { cx.linkage.set(FunctionType::Extern); - let func = cx.declare_fn(&sym, &fn_abi); + let func = cx.declare_fn(sym, fn_abi); attributes::from_fn_attrs(cx, func, instance); diff --git a/compiler/rustc_codegen_gcc/src/common.rs b/compiler/rustc_codegen_gcc/src/common.rs index fa8a1ec037c..19333689aaa 100644 --- a/compiler/rustc_codegen_gcc/src/common.rs +++ b/compiler/rustc_codegen_gcc/src/common.rs @@ -21,7 +21,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { fn global_string(&self, string: &str) -> LValue<'gcc> { // TODO(antoyo): handle non-null-terminated strings. - let string = self.context.new_string_literal(&*string); + let string = self.context.new_string_literal(string); let sym = self.generate_local_symbol_name("str"); let global = self.declare_private_global(&sym, self.val_ty(string)); global.global_set_initializer_rvalue(string); @@ -187,7 +187,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> { return self .context .new_rvalue_from_double(ty, f32::from_bits(data as u32) as f64); - } else if ty == self.double_type { + } + if ty == self.double_type { return self.context.new_rvalue_from_double(ty, f64::from_bits(data as u64)); } @@ -297,7 +298,7 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> { } else if self.is_ulonglong(cx) { cx.longlong_type } else { - self.clone() + *self } } @@ -323,7 +324,7 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> { } else if self.is_longlong(cx) { cx.ulonglong_type } else { - self.clone() + *self } } } @@ -436,7 +437,7 @@ impl<'gcc, 'tcx> TypeReflection<'gcc, 'tcx> for Type<'gcc> { } fn is_vector(&self) -> bool { - let mut typ = self.clone(); + let mut typ = *self; loop { if typ.dyncast_vector().is_some() { return true; diff --git a/compiler/rustc_codegen_gcc/src/consts.rs b/compiler/rustc_codegen_gcc/src/consts.rs index 3d73a60b255..ba7e08e33ef 100644 --- a/compiler/rustc_codegen_gcc/src/consts.rs +++ b/compiler/rustc_codegen_gcc/src/consts.rs @@ -1,15 +1,16 @@ #[cfg(feature = "master")] use gccjit::{FnAttribute, VarAttribute, Visibility}; -use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue}; -use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods}; +use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue, Type}; +use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, StaticMethods}; +use rustc_hir::def::DefKind; +use rustc_middle::bug; use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs}; use rustc_middle::mir::interpret::{ self, read_target_uint, ConstAllocation, ErrorHandled, Scalar as InterpScalar, }; -use rustc_middle::mir::mono::MonoItem; use rustc_middle::span_bug; use rustc_middle::ty::layout::LayoutOf; -use rustc_middle::ty::{self, Instance, Ty}; +use rustc_middle::ty::{self, Instance}; use rustc_span::def_id::DefId; use rustc_target::abi::{self, Align, HasDataLayout, Primitive, Size, WrappingRange}; @@ -63,16 +64,15 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> { global_value } + #[cfg_attr(not(feature = "master"), allow(unused_mut))] fn codegen_static(&self, def_id: DefId) { let attrs = self.tcx.codegen_fn_attrs(def_id); - let value = match codegen_static_initializer(&self, def_id) { - Ok((value, _)) => value, + let Ok((value, alloc)) = codegen_static_initializer(self, def_id) else { // Error has already been reported - Err(_) => return, + return; }; - - let global = self.get_static(def_id); + let alloc = alloc.inner(); // boolean SSA values are i1, but they have to be stored in i8 slots, // otherwise some LLVM optimization passes don't work as expected @@ -81,23 +81,25 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> { unimplemented!(); }; - let instance = Instance::mono(self.tcx, def_id); - let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all()); - let gcc_type = self.layout_of(ty).gcc_type(self); + let is_thread_local = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL); + let global = self.get_static_inner(def_id, val_llty); - set_global_alignment(self, global, self.align_of(ty)); + #[cfg(feature = "master")] + if global.to_rvalue().get_type() != val_llty { + global.to_rvalue().set_type(val_llty); + } + set_global_alignment(self, global, alloc.align); - let value = self.bitcast_if_needed(value, gcc_type); global.global_set_initializer_rvalue(value); // As an optimization, all shared statics which do not have interior // mutability are placed into read-only memory. - if !self.tcx.static_mutability(def_id).unwrap().is_mut() && self.type_is_freeze(ty) { + if alloc.mutability.is_not() { #[cfg(feature = "master")] global.global_set_readonly(); } - if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) { + if is_thread_local { // Do not allow LLVM to change the alignment of a TLS on macOS. // // By default a global's alignment can be freely increased. @@ -205,35 +207,49 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { pub fn get_static(&self, def_id: DefId) -> LValue<'gcc> { let instance = Instance::mono(self.tcx, def_id); - let fn_attrs = self.tcx.codegen_fn_attrs(def_id); + let DefKind::Static { nested, .. } = self.tcx.def_kind(def_id) else { bug!() }; + // Nested statics do not have a type, so pick a random type and let `define_static` figure out + // the gcc type from the actual evaluated initializer. + let gcc_type = if nested { + self.type_i8() + } else { + let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all()); + self.layout_of(ty).gcc_type(self) + }; + + self.get_static_inner(def_id, gcc_type) + } + + pub(crate) fn get_static_inner(&self, def_id: DefId, gcc_type: Type<'gcc>) -> LValue<'gcc> { + let instance = Instance::mono(self.tcx, def_id); if let Some(&global) = self.instances.borrow().get(&instance) { + trace!("used cached value"); return global; } - let defined_in_current_codegen_unit = - self.codegen_unit.items().contains_key(&MonoItem::Static(def_id)); - assert!( - !defined_in_current_codegen_unit, - "consts::get_static() should always hit the cache for \ - statics defined in the same CGU, but did not for `{:?}`", - def_id - ); - - let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all()); + // FIXME: Once we stop removing globals in `codegen_static`, we can uncomment this code. + // let defined_in_current_codegen_unit = + // self.codegen_unit.items().contains_key(&MonoItem::Static(def_id)); + // assert!( + // !defined_in_current_codegen_unit, + // "consts::get_static() should always hit the cache for \ + // statics defined in the same CGU, but did not for `{:?}`", + // def_id + // ); let sym = self.tcx.symbol_name(instance).name; + let fn_attrs = self.tcx.codegen_fn_attrs(def_id); let global = if def_id.is_local() && !self.tcx.is_foreign_item(def_id) { - let llty = self.layout_of(ty).gcc_type(self); if let Some(global) = self.get_declared_value(sym) { - if self.val_ty(global) != self.type_ptr_to(llty) { + if self.val_ty(global) != self.type_ptr_to(gcc_type) { span_bug!(self.tcx.def_span(def_id), "Conflicting types for static"); } } let is_tls = fn_attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL); let global = self.declare_global( - &sym, - llty, + sym, + gcc_type, GlobalKind::Exported, is_tls, fn_attrs.link_section, @@ -246,7 +262,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { global } else { - check_and_apply_linkage(&self, &fn_attrs, ty, sym) + check_and_apply_linkage(self, fn_attrs, gcc_type, sym) }; if !def_id.is_local() { @@ -360,18 +376,14 @@ fn codegen_static_initializer<'gcc, 'tcx>( fn check_and_apply_linkage<'gcc, 'tcx>( cx: &CodegenCx<'gcc, 'tcx>, attrs: &CodegenFnAttrs, - ty: Ty<'tcx>, + gcc_type: Type<'gcc>, sym: &str, ) -> LValue<'gcc> { let is_tls = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL); - let gcc_type = cx.layout_of(ty).gcc_type(cx); if let Some(linkage) = attrs.import_linkage { // Declare a symbol `foo` with the desired linkage. - let global1 = cx.declare_global_with_linkage( - &sym, - cx.type_i8(), - base::global_linkage_to_gcc(linkage), - ); + let global1 = + cx.declare_global_with_linkage(sym, cx.type_i8(), base::global_linkage_to_gcc(linkage)); // Declare an internal global `extern_with_linkage_foo` which // is initialized with the address of `foo`. If `foo` is @@ -380,7 +392,7 @@ fn check_and_apply_linkage<'gcc, 'tcx>( // `extern_with_linkage_foo` will instead be initialized to // zero. let mut real_name = "_rust_extern_with_linkage_".to_string(); - real_name.push_str(&sym); + real_name.push_str(sym); let global2 = cx.define_global(&real_name, gcc_type, is_tls, attrs.link_section); // TODO(antoyo): set linkage. let value = cx.const_ptrcast(global1.get_address(None), gcc_type); @@ -397,6 +409,6 @@ fn check_and_apply_linkage<'gcc, 'tcx>( // don't do this then linker errors can be generated where the linker // complains that one object files has a thread local version of the // symbol and another one doesn't. - cx.declare_global(&sym, gcc_type, GlobalKind::Imported, is_tls, attrs.link_section) + cx.declare_global(sym, gcc_type, GlobalKind::Imported, is_tls, attrs.link_section) } } diff --git a/compiler/rustc_codegen_gcc/src/context.rs b/compiler/rustc_codegen_gcc/src/context.rs index 1d689c9ac0e..86a5000a723 100644 --- a/compiler/rustc_codegen_gcc/src/context.rs +++ b/compiler/rustc_codegen_gcc/src/context.rs @@ -68,6 +68,10 @@ pub struct CodegenCx<'gcc, 'tcx> { pub sizet_type: Type<'gcc>, pub supports_128bit_integers: bool, + pub supports_f16_type: bool, + pub supports_f32_type: bool, + pub supports_f64_type: bool, + pub supports_f128_type: bool, pub float_type: Type<'gcc>, pub double_type: Type<'gcc>, @@ -110,7 +114,7 @@ pub struct CodegenCx<'gcc, 'tcx> { local_gen_sym_counter: Cell<usize>, eh_personality: Cell<Option<RValue<'gcc>>>, - #[cfg(feature="master")] + #[cfg(feature = "master")] pub rust_try_fn: Cell<Option<(Type<'gcc>, Function<'gcc>)>>, pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>, @@ -122,16 +126,21 @@ pub struct CodegenCx<'gcc, 'tcx> { /// FIXME(antoyo): fix the rustc API to avoid having this hack. pub structs_as_pointer: RefCell<FxHashSet<RValue<'gcc>>>, - #[cfg(feature="master")] + #[cfg(feature = "master")] pub cleanup_blocks: RefCell<FxHashSet<Block<'gcc>>>, } impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { + #[allow(clippy::too_many_arguments)] pub fn new( context: &'gcc Context<'gcc>, codegen_unit: &'tcx CodegenUnit<'tcx>, tcx: TyCtxt<'tcx>, supports_128bit_integers: bool, + supports_f16_type: bool, + supports_f32_type: bool, + supports_f64_type: bool, + supports_f128_type: bool, ) -> Self { let create_type = |ctype, rust_type| { let layout = tcx.layout_of(ParamEnv::reveal_all().and(rust_type)).unwrap(); @@ -304,6 +313,10 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { sizet_type, supports_128bit_integers, + supports_f16_type, + supports_f32_type, + supports_f64_type, + supports_f128_type, float_type, double_type, @@ -324,11 +337,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { struct_types: Default::default(), local_gen_sym_counter: Cell::new(0), eh_personality: Cell::new(None), - #[cfg(feature="master")] + #[cfg(feature = "master")] rust_try_fn: Cell::new(None), pointee_infos: Default::default(), structs_as_pointer: Default::default(), - #[cfg(feature="master")] + #[cfg(feature = "master")] cleanup_blocks: Default::default(), }; // TODO(antoyo): instead of doing this, add SsizeT to libgccjit. @@ -385,7 +398,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { } pub fn sess(&self) -> &'tcx Session { - &self.tcx.sess + self.tcx.sess } pub fn bitcast_if_needed( @@ -432,7 +445,9 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> { let func_name = self.tcx.symbol_name(instance).name; let func = if self.intrinsics.borrow().contains_key(func_name) { - self.intrinsics.borrow()[func_name].clone() + self.intrinsics.borrow()[func_name] + } else if let Some(variable) = self.get_declared_value(func_name) { + return variable; } else { get_fn(self, instance) }; @@ -485,7 +500,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> { let symbol_name = tcx.symbol_name(instance).name; let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty()); self.linkage.set(FunctionType::Extern); - let func = self.declare_fn(symbol_name, &fn_abi); + let func = self.declare_fn(symbol_name, fn_abi); let func: RValue<'gcc> = unsafe { std::mem::transmute(func) }; func } @@ -496,7 +511,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> { "rust_eh_personality" }; let func = self.declare_func(name, self.type_i32(), &[], true); - unsafe { std::mem::transmute(func) } + unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) } } }; // TODO(antoyo): apply target cpu attributes. @@ -505,7 +520,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> { } fn sess(&self) -> &Session { - &self.tcx.sess + self.tcx.sess } fn codegen_unit(&self) -> &'tcx CodegenUnit<'tcx> { @@ -522,7 +537,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> { fn declare_c_main(&self, fn_type: Self::Type) -> Option<Self::Function> { let entry_name = self.sess().target.entry_name.as_ref(); - if self.get_declared_value(entry_name).is_none() { + if !self.functions.borrow().contains_key(entry_name) { Some(self.declare_entry_fn(entry_name, fn_type, ())) } else { // If the symbol already exists, it is an error: for example, the user wrote @@ -614,7 +629,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> { // user defined names let mut name = String::with_capacity(prefix.len() + 6); name.push_str(prefix); - name.push_str("."); + name.push('.'); name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY)); name } diff --git a/compiler/rustc_codegen_gcc/src/debuginfo.rs b/compiler/rustc_codegen_gcc/src/debuginfo.rs index 1d0a6d9f09b..3d9ea278a63 100644 --- a/compiler/rustc_codegen_gcc/src/debuginfo.rs +++ b/compiler/rustc_codegen_gcc/src/debuginfo.rs @@ -90,7 +90,7 @@ fn compute_mir_scopes<'gcc, 'tcx>( /// FIXME(tempdragon/?): Add Scope Support Here. fn make_mir_scope<'gcc, 'tcx>( cx: &CodegenCx<'gcc, 'tcx>, - instance: Instance<'tcx>, + _instance: Instance<'tcx>, mir: &Body<'tcx>, variables: &Option<BitSet<SourceScope>>, debug_context: &mut FunctionDebugContext<'tcx, (), Location<'gcc>>, @@ -103,7 +103,7 @@ fn make_mir_scope<'gcc, 'tcx>( let scope_data = &mir.source_scopes[scope]; let parent_scope = if let Some(parent) = scope_data.parent_scope { - make_mir_scope(cx, instance, mir, variables, debug_context, instantiated, parent); + make_mir_scope(cx, _instance, mir, variables, debug_context, instantiated, parent); debug_context.scopes[parent] } else { // The root is the function itself. @@ -117,7 +117,7 @@ fn make_mir_scope<'gcc, 'tcx>( return; }; - if let Some(vars) = variables { + if let Some(ref vars) = *variables { if !vars.contains(scope) && scope_data.inlined.is_none() { // Do not create a DIScope if there are no variables defined in this // MIR `SourceScope`, and it's not `inlined`, to avoid debuginfo bloat. @@ -135,8 +135,14 @@ fn make_mir_scope<'gcc, 'tcx>( let inlined_at = scope_data.inlined.map(|(_, callsite_span)| { // FIXME(eddyb) this doesn't account for the macro-related // `Span` fixups that `rustc_codegen_ssa::mir::debuginfo` does. - let callsite_scope = parent_scope.adjust_dbg_scope_for_span(cx, callsite_span); - cx.dbg_loc(callsite_scope, parent_scope.inlined_at, callsite_span) + + // TODO(tempdragon): Add scope support and then revert to cg_llvm version of this closure + // NOTE: These variables passed () here. + // Changed to comply to clippy. + + /* let callsite_scope = */ + parent_scope.adjust_dbg_scope_for_span(cx, callsite_span); + cx.dbg_loc(/* callsite_scope */ (), parent_scope.inlined_at, callsite_span) }); let p_inlined_at = parent_scope.inlined_at; // TODO(tempdragon): dbg_scope: Add support for scope extension here. @@ -224,7 +230,7 @@ impl<'gcc, 'tcx> DebugInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> { file_end_pos: BytePos(0), }; let mut fn_debug_context = FunctionDebugContext { - scopes: IndexVec::from_elem(empty_scope, &mir.source_scopes.as_slice()), + scopes: IndexVec::from_elem(empty_scope, mir.source_scopes.as_slice()), inlined_function_scopes: Default::default(), }; @@ -273,16 +279,19 @@ impl<'gcc, 'tcx> DebugInfoMethods<'tcx> for CodegenCx<'gcc, 'tcx> { ) -> Self::DILocation { let pos = span.lo(); let DebugLoc { file, line, col } = self.lookup_debug_loc(pos); - let loc = match &file.name { - rustc_span::FileName::Real(name) => match name { - rustc_span::RealFileName::LocalPath(name) => { + let loc = match file.name { + rustc_span::FileName::Real(ref name) => match *name { + rustc_span::RealFileName::LocalPath(ref name) => { if let Some(name) = name.to_str() { self.context.new_location(name, line as i32, col as i32) } else { Location::null() } } - rustc_span::RealFileName::Remapped { local_path, virtual_name: _ } => { + rustc_span::RealFileName::Remapped { + ref local_path, + virtual_name: ref _unused, + } => { if let Some(name) = local_path.as_ref() { if let Some(name) = name.to_str() { self.context.new_location(name, line as i32, col as i32) diff --git a/compiler/rustc_codegen_gcc/src/declare.rs b/compiler/rustc_codegen_gcc/src/declare.rs index db6edbab12d..a2b158ee0a7 100644 --- a/compiler/rustc_codegen_gcc/src/declare.rs +++ b/compiler/rustc_codegen_gcc/src/declare.rs @@ -35,7 +35,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { pub fn declare_unnamed_global(&self, ty: Type<'gcc>) -> LValue<'gcc> { let name = self.generate_local_symbol_name("global"); - self.context.new_global(None, GlobalKind::Internal, ty, &name) + self.context.new_global(None, GlobalKind::Internal, ty, name) } pub fn declare_global_with_linkage( @@ -176,16 +176,14 @@ fn declare_raw_fn<'gcc>( cx.functions.borrow()[name] } else { let params: Vec<_> = param_types - .into_iter() + .iter() .enumerate() - .map(|(index, param)| { - cx.context.new_parameter(None, *param, &format!("param{}", index)) - }) // TODO(antoyo): set name. + .map(|(index, param)| cx.context.new_parameter(None, *param, format!("param{}", index))) // TODO(antoyo): set name. .collect(); #[cfg(not(feature = "master"))] - let name = mangle_name(name); + let name = &mangle_name(name); let func = - cx.context.new_function(None, cx.linkage.get(), return_type, ¶ms, &name, variadic); + cx.context.new_function(None, cx.linkage.get(), return_type, ¶ms, name, variadic); cx.functions.borrow_mut().insert(name.to_string(), func); #[cfg(feature = "master")] @@ -200,10 +198,10 @@ fn declare_raw_fn<'gcc>( // create a wrapper function that calls rust_eh_personality. let params: Vec<_> = param_types - .into_iter() + .iter() .enumerate() .map(|(index, param)| { - cx.context.new_parameter(None, *param, &format!("param{}", index)) + cx.context.new_parameter(None, *param, format!("param{}", index)) }) // TODO(antoyo): set name. .collect(); let gcc_func = cx.context.new_function( diff --git a/compiler/rustc_codegen_gcc/src/int.rs b/compiler/rustc_codegen_gcc/src/int.rs index 841bcf592e4..e4c5eb91373 100644 --- a/compiler/rustc_codegen_gcc/src/int.rs +++ b/compiler/rustc_codegen_gcc/src/int.rs @@ -2,8 +2,6 @@ //! This module exists because some integer types are not supported on some gcc platforms, e.g. //! 128-bit integers on 32-bit platforms and thus require to be handled manually. -use std::convert::TryFrom; - use gccjit::{BinaryOp, ComparisonOp, FunctionType, Location, RValue, ToRValue, Type, UnaryOp}; use rustc_codegen_ssa::common::{IntPredicate, TypeKind}; use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, BuilderMethods, OverflowOp}; @@ -40,7 +38,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.cx.context.new_unary_op(self.location, operation, typ, a) } else { let element_type = typ.dyncast_array().expect("element type"); - self.from_low_high_rvalues( + self.concat_low_high_rvalues( typ, self.cx.context.new_unary_op( self.location, @@ -83,7 +81,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let b = self.context.new_cast(self.location, b, a_type); a >> b } else { - a >> b + let a_size = a_type.get_size(); + let b_size = b_type.get_size(); + match a_size.cmp(&b_size) { + std::cmp::Ordering::Less => { + let a = self.context.new_cast(self.location, a, b_type); + a >> b + } + std::cmp::Ordering::Equal => a >> b, + std::cmp::Ordering::Greater => { + let b = self.context.new_cast(self.location, b, a_type); + a >> b + } + } } } else if a_type.is_vector() && a_type.is_vector() { a >> b @@ -114,7 +124,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let shift_value = self.gcc_sub(b, sixty_four); let high = self.high(a); let sign = if a_type.is_signed(self) { high >> sixty_three } else { zero }; - let array_value = self.from_low_high_rvalues(a_type, high >> shift_value, sign); + let array_value = self.concat_low_high_rvalues(a_type, high >> shift_value, sign); then_block.add_assignment(self.location, result, array_value); then_block.end_with_jump(self.location, after_block); @@ -126,12 +136,15 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let shift_value = self.gcc_sub(sixty_four, b); // NOTE: cast low to its unsigned type in order to perform a logical right shift. - let unsigned_type = native_int_type.to_unsigned(&self.cx); + let unsigned_type = native_int_type.to_unsigned(self.cx); let casted_low = self.context.new_cast(self.location, self.low(a), unsigned_type); let shifted_low = casted_low >> self.context.new_cast(self.location, b, unsigned_type); let shifted_low = self.context.new_cast(self.location, shifted_low, native_int_type); - let array_value = - self.from_low_high_rvalues(a_type, (high << shift_value) | shifted_low, high >> b); + let array_value = self.concat_low_high_rvalues( + a_type, + (high << shift_value) | shifted_low, + high >> b, + ); actual_else_block.add_assignment(self.location, result, array_value); actual_else_block.end_with_jump(self.location, after_block); @@ -255,10 +268,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { ) -> (<Self as BackendTypes>::Value, <Self as BackendTypes>::Value) { use rustc_middle::ty::{Int, IntTy::*, Uint, UintTy::*}; - let new_kind = match typ.kind() { + let new_kind = match *typ.kind() { Int(t @ Isize) => Int(t.normalize(self.tcx.sess.target.pointer_width)), Uint(t @ Usize) => Uint(t.normalize(self.tcx.sess.target.pointer_width)), - t @ (Uint(_) | Int(_)) => t.clone(), + t @ (Uint(_) | Int(_)) => t, _ => panic!("tried to get overflow intrinsic for op applied to non-int type"), }; @@ -344,7 +357,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { } }; - let intrinsic = self.context.get_builtin_function(&name); + let intrinsic = self.context.get_builtin_function(name); let res = self .current_func() // TODO(antoyo): is it correct to use rhs type instead of the parameter typ? @@ -454,7 +467,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let native_int_type = a_type.dyncast_array().expect("get element type"); // NOTE: cast low to its unsigned type in order to perform a comparison correctly (e.g. // the sign is only on high). - let unsigned_type = native_int_type.to_unsigned(&self.cx); + let unsigned_type = native_int_type.to_unsigned(self.cx); let lhs_low = self.context.new_cast(self.location, self.low(lhs), unsigned_type); let rhs_low = self.context.new_cast(self.location, self.low(rhs), unsigned_type); @@ -589,7 +602,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { | IntPredicate::IntULT | IntPredicate::IntULE => { if !a_type.is_vector() { - let unsigned_type = a_type.to_unsigned(&self.cx); + let unsigned_type = a_type.to_unsigned(self.cx); lhs = self.context.new_cast(self.location, lhs, unsigned_type); rhs = self.context.new_cast(self.location, rhs, unsigned_type); } @@ -612,7 +625,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { { a ^ b } else { - self.from_low_high_rvalues( + self.concat_low_high_rvalues( a_type, self.low(a) ^ self.low(b), self.high(a) ^ self.high(b), @@ -635,7 +648,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let b = self.context.new_cast(self.location, b, a_type); a << b } else { - a << b + let a_size = a_type.get_size(); + let b_size = b_type.get_size(); + match a_size.cmp(&b_size) { + std::cmp::Ordering::Less => { + let a = self.context.new_cast(self.location, a, b_type); + a << b + } + std::cmp::Ordering::Equal => a << b, + std::cmp::Ordering::Greater => { + let b = self.context.new_cast(self.location, b, a_type); + a << b + } + } } } else if a_type.is_vector() && a_type.is_vector() { a << b @@ -661,7 +686,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { self.llbb().end_with_conditional(self.location, condition, then_block, else_block); let array_value = - self.from_low_high_rvalues(a_type, zero, self.low(a) << (b - sixty_four)); + self.concat_low_high_rvalues(a_type, zero, self.low(a) << (b - sixty_four)); then_block.add_assignment(self.location, result, array_value); then_block.end_with_jump(self.location, after_block); @@ -673,13 +698,13 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // NOTE: cast low to its unsigned type in order to perform a logical right shift. // TODO(antoyo): adjust this ^ comment. - let unsigned_type = native_int_type.to_unsigned(&self.cx); + let unsigned_type = native_int_type.to_unsigned(self.cx); let casted_low = self.context.new_cast(self.location, self.low(a), unsigned_type); let shift_value = self.context.new_cast(self.location, sixty_four - b, unsigned_type); let high_low = self.context.new_cast(self.location, casted_low >> shift_value, native_int_type); - let array_value = self.from_low_high_rvalues( + let array_value = self.concat_low_high_rvalues( a_type, self.low(a) << b, (self.high(a) << b) | high_low, @@ -708,7 +733,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // NOTE: we also need to swap the two elements here, in addition to swapping inside // the elements themselves like done above. - return self.from_low_high_rvalues(arg_type, swapped_msb, swapped_lsb); + return self.concat_low_high_rvalues(arg_type, swapped_msb, swapped_lsb); } // TODO(antoyo): check if it's faster to use string literals and a @@ -727,10 +752,10 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { pub fn gcc_int(&self, typ: Type<'gcc>, int: i64) -> RValue<'gcc> { if self.is_native_int_type_or_bool(typ) { - self.context.new_rvalue_from_long(typ, i64::try_from(int).expect("i64::try_from")) + self.context.new_rvalue_from_long(typ, int) } else { // NOTE: set the sign in high. - self.from_low_high(typ, int, -(int.is_negative() as i64)) + self.concat_low_high(typ, int, -(int.is_negative() as i64)) } } @@ -740,10 +765,9 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { let num = self.context.new_rvalue_from_long(self.u64_type, int as i64); self.gcc_int_cast(num, typ) } else if self.is_native_int_type_or_bool(typ) { - self.context - .new_rvalue_from_long(typ, u64::try_from(int).expect("u64::try_from") as i64) + self.context.new_rvalue_from_long(typ, int as i64) } else { - self.from_low_high(typ, int as i64, 0) + self.concat_low_high(typ, int as i64, 0) } } @@ -760,7 +784,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { let shift = high << sixty_four; shift | self.context.new_cast(None, low, typ) } else { - self.from_low_high(typ, low as i64, high as i64) + self.concat_low_high(typ, low as i64, high as i64) } } else if typ.is_i128(self) { // FIXME(antoyo): libgccjit cannot create 128-bit values yet. @@ -775,7 +799,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { if self.is_native_int_type_or_bool(typ) { self.context.new_rvalue_zero(typ) } else { - self.from_low_high(typ, 0, 0) + self.concat_low_high(typ, 0, 0) } } @@ -813,7 +837,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { "both types should either be native or non-native for or operation" ); let native_int_type = a_type.dyncast_array().expect("get element type"); - self.from_low_high_rvalues( + self.concat_low_high_rvalues( a_type, self.context.new_binary_op( loc, @@ -858,7 +882,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { let is_negative = self.context.new_comparison(None, ComparisonOp::LessThan, value, zero); let is_negative = self.gcc_int_cast(is_negative, dest_element_type); - self.from_low_high_rvalues( + self.concat_low_high_rvalues( dest_typ, self.context.new_cast(None, value, dest_element_type), self.context.new_unary_op(None, UnaryOp::Minus, dest_element_type, is_negative), @@ -926,7 +950,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { return self.context.new_cast(None, value, dest_typ); } - debug_assert!(value_type.dyncast_array().is_some()); + debug_assert!(dest_typ.dyncast_array().is_some()); let name_suffix = match self.type_kind(value_type) { TypeKind::Float => "sfti", TypeKind::Double => "dfti", @@ -978,7 +1002,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { .to_rvalue() } - fn from_low_high_rvalues( + fn concat_low_high_rvalues( &self, typ: Type<'gcc>, low: RValue<'gcc>, @@ -993,7 +1017,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { self.context.new_array_constructor(None, typ, &values) } - fn from_low_high(&self, typ: Type<'gcc>, low: i64, high: i64) -> RValue<'gcc> { + fn concat_low_high(&self, typ: Type<'gcc>, low: i64, high: i64) -> RValue<'gcc> { let (first, last) = match self.sess().target.options.endian { Endian::Little => (low, high), Endian::Big => (high, low), diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs index c4ae1751fa0..f7500933789 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/archs.rs @@ -74,6 +74,10 @@ match name { "llvm.amdgcn.cvt.sr.bf8.f32" => "__builtin_amdgcn_cvt_sr_bf8_f32", "llvm.amdgcn.cvt.sr.fp8.f32" => "__builtin_amdgcn_cvt_sr_fp8_f32", "llvm.amdgcn.dispatch.id" => "__builtin_amdgcn_dispatch_id", + "llvm.amdgcn.dot4.f32.bf8.bf8" => "__builtin_amdgcn_dot4_f32_bf8_bf8", + "llvm.amdgcn.dot4.f32.bf8.fp8" => "__builtin_amdgcn_dot4_f32_bf8_fp8", + "llvm.amdgcn.dot4.f32.fp8.bf8" => "__builtin_amdgcn_dot4_f32_fp8_bf8", + "llvm.amdgcn.dot4.f32.fp8.fp8" => "__builtin_amdgcn_dot4_f32_fp8_fp8", "llvm.amdgcn.ds.add.gs.reg.rtn" => "__builtin_amdgcn_ds_add_gs_reg_rtn", "llvm.amdgcn.ds.bpermute" => "__builtin_amdgcn_ds_bpermute", "llvm.amdgcn.ds.fadd.v2bf16" => "__builtin_amdgcn_ds_atomic_fadd_v2bf16", @@ -2291,6 +2295,10 @@ match name { "llvm.loongarch.csrxchg.d" => "__builtin_loongarch_csrxchg_d", "llvm.loongarch.csrxchg.w" => "__builtin_loongarch_csrxchg_w", "llvm.loongarch.dbar" => "__builtin_loongarch_dbar", + "llvm.loongarch.frecipe.d" => "__builtin_loongarch_frecipe_d", + "llvm.loongarch.frecipe.s" => "__builtin_loongarch_frecipe_s", + "llvm.loongarch.frsqrte.d" => "__builtin_loongarch_frsqrte_d", + "llvm.loongarch.frsqrte.s" => "__builtin_loongarch_frsqrte_s", "llvm.loongarch.ibar" => "__builtin_loongarch_ibar", "llvm.loongarch.iocsrrd.b" => "__builtin_loongarch_iocsrrd_b", "llvm.loongarch.iocsrrd.d" => "__builtin_loongarch_iocsrrd_d", @@ -2529,6 +2537,8 @@ match name { "llvm.loongarch.lasx.xvfnmsub.s" => "__builtin_lasx_xvfnmsub_s", "llvm.loongarch.lasx.xvfrecip.d" => "__builtin_lasx_xvfrecip_d", "llvm.loongarch.lasx.xvfrecip.s" => "__builtin_lasx_xvfrecip_s", + "llvm.loongarch.lasx.xvfrecipe.d" => "__builtin_lasx_xvfrecipe_d", + "llvm.loongarch.lasx.xvfrecipe.s" => "__builtin_lasx_xvfrecipe_s", "llvm.loongarch.lasx.xvfrint.d" => "__builtin_lasx_xvfrint_d", "llvm.loongarch.lasx.xvfrint.s" => "__builtin_lasx_xvfrint_s", "llvm.loongarch.lasx.xvfrintrm.d" => "__builtin_lasx_xvfrintrm_d", @@ -2541,6 +2551,8 @@ match name { "llvm.loongarch.lasx.xvfrintrz.s" => "__builtin_lasx_xvfrintrz_s", "llvm.loongarch.lasx.xvfrsqrt.d" => "__builtin_lasx_xvfrsqrt_d", "llvm.loongarch.lasx.xvfrsqrt.s" => "__builtin_lasx_xvfrsqrt_s", + "llvm.loongarch.lasx.xvfrsqrte.d" => "__builtin_lasx_xvfrsqrte_d", + "llvm.loongarch.lasx.xvfrsqrte.s" => "__builtin_lasx_xvfrsqrte_s", "llvm.loongarch.lasx.xvfrstp.b" => "__builtin_lasx_xvfrstp_b", "llvm.loongarch.lasx.xvfrstp.h" => "__builtin_lasx_xvfrstp_h", "llvm.loongarch.lasx.xvfrstpi.b" => "__builtin_lasx_xvfrstpi_b", @@ -3255,6 +3267,8 @@ match name { "llvm.loongarch.lsx.vfnmsub.s" => "__builtin_lsx_vfnmsub_s", "llvm.loongarch.lsx.vfrecip.d" => "__builtin_lsx_vfrecip_d", "llvm.loongarch.lsx.vfrecip.s" => "__builtin_lsx_vfrecip_s", + "llvm.loongarch.lsx.vfrecipe.d" => "__builtin_lsx_vfrecipe_d", + "llvm.loongarch.lsx.vfrecipe.s" => "__builtin_lsx_vfrecipe_s", "llvm.loongarch.lsx.vfrint.d" => "__builtin_lsx_vfrint_d", "llvm.loongarch.lsx.vfrint.s" => "__builtin_lsx_vfrint_s", "llvm.loongarch.lsx.vfrintrm.d" => "__builtin_lsx_vfrintrm_d", @@ -3267,6 +3281,8 @@ match name { "llvm.loongarch.lsx.vfrintrz.s" => "__builtin_lsx_vfrintrz_s", "llvm.loongarch.lsx.vfrsqrt.d" => "__builtin_lsx_vfrsqrt_d", "llvm.loongarch.lsx.vfrsqrt.s" => "__builtin_lsx_vfrsqrt_s", + "llvm.loongarch.lsx.vfrsqrte.d" => "__builtin_lsx_vfrsqrte_d", + "llvm.loongarch.lsx.vfrsqrte.s" => "__builtin_lsx_vfrsqrte_s", "llvm.loongarch.lsx.vfrstp.b" => "__builtin_lsx_vfrstp_b", "llvm.loongarch.lsx.vfrstp.h" => "__builtin_lsx_vfrstp_h", "llvm.loongarch.lsx.vfrstpi.b" => "__builtin_lsx_vfrstpi_b", @@ -4434,6 +4450,7 @@ match name { "llvm.nvvm.abs.bf16x2" => "__nvvm_abs_bf16x2", "llvm.nvvm.abs.i" => "__nvvm_abs_i", "llvm.nvvm.abs.ll" => "__nvvm_abs_ll", + "llvm.nvvm.activemask" => "__nvvm_activemask", "llvm.nvvm.add.rm.d" => "__nvvm_add_rm_d", "llvm.nvvm.add.rm.f" => "__nvvm_add_rm_f", "llvm.nvvm.add.rm.ftz.f" => "__nvvm_add_rm_ftz_f", @@ -4522,6 +4539,7 @@ match name { "llvm.nvvm.ex2.approx.d" => "__nvvm_ex2_approx_d", "llvm.nvvm.ex2.approx.f" => "__nvvm_ex2_approx_f", "llvm.nvvm.ex2.approx.ftz.f" => "__nvvm_ex2_approx_ftz_f", + "llvm.nvvm.exit" => "__nvvm_exit", "llvm.nvvm.f2bf16.rn" => "__nvvm_f2bf16_rn", "llvm.nvvm.f2bf16.rn.relu" => "__nvvm_f2bf16_rn_relu", "llvm.nvvm.f2bf16.rz" => "__nvvm_f2bf16_rz", @@ -4722,8 +4740,11 @@ match name { "llvm.nvvm.mul24.ui" => "__nvvm_mul24_ui", "llvm.nvvm.mulhi.i" => "__nvvm_mulhi_i", "llvm.nvvm.mulhi.ll" => "__nvvm_mulhi_ll", + "llvm.nvvm.mulhi.s" => "__nvvm_mulhi_s", "llvm.nvvm.mulhi.ui" => "__nvvm_mulhi_ui", "llvm.nvvm.mulhi.ull" => "__nvvm_mulhi_ull", + "llvm.nvvm.mulhi.us" => "__nvvm_mulhi_us", + "llvm.nvvm.nanosleep" => "__nvvm_nanosleep", "llvm.nvvm.neg.bf16" => "__nvvm_neg_bf16", "llvm.nvvm.neg.bf16x2" => "__nvvm_neg_bf16x2", "llvm.nvvm.popc.i" => "__nvvm_popc_i", @@ -4783,6 +4804,7 @@ match name { "llvm.nvvm.read.ptx.sreg.envreg7" => "__nvvm_read_ptx_sreg_envreg7", "llvm.nvvm.read.ptx.sreg.envreg8" => "__nvvm_read_ptx_sreg_envreg8", "llvm.nvvm.read.ptx.sreg.envreg9" => "__nvvm_read_ptx_sreg_envreg9", + "llvm.nvvm.read.ptx.sreg.globaltimer" => "__nvvm_read_ptx_sreg_globaltimer", "llvm.nvvm.read.ptx.sreg.gridid" => "__nvvm_read_ptx_sreg_gridid", // [DUPLICATE]: "llvm.nvvm.read.ptx.sreg.gridid" => "__nvvm_read_ptx_sreg_", "llvm.nvvm.read.ptx.sreg.laneid" => "__nvvm_read_ptx_sreg_laneid", @@ -4835,6 +4857,7 @@ match name { "llvm.nvvm.redux.sync.umax" => "__nvvm_redux_sync_umax", "llvm.nvvm.redux.sync.umin" => "__nvvm_redux_sync_umin", "llvm.nvvm.redux.sync.xor" => "__nvvm_redux_sync_xor", + "llvm.nvvm.reflect" => "__nvvm_reflect", "llvm.nvvm.rotate.b32" => "__nvvm_rotate_b32", "llvm.nvvm.rotate.b64" => "__nvvm_rotate_b64", "llvm.nvvm.rotate.right.b64" => "__nvvm_rotate_right_b64", @@ -4845,7 +4868,11 @@ match name { "llvm.nvvm.rsqrt.approx.f" => "__nvvm_rsqrt_approx_f", "llvm.nvvm.rsqrt.approx.ftz.f" => "__nvvm_rsqrt_approx_ftz_f", "llvm.nvvm.sad.i" => "__nvvm_sad_i", + "llvm.nvvm.sad.ll" => "__nvvm_sad_ll", + "llvm.nvvm.sad.s" => "__nvvm_sad_s", "llvm.nvvm.sad.ui" => "__nvvm_sad_ui", + "llvm.nvvm.sad.ull" => "__nvvm_sad_ull", + "llvm.nvvm.sad.us" => "__nvvm_sad_us", "llvm.nvvm.saturate.d" => "__nvvm_saturate_d", "llvm.nvvm.saturate.f" => "__nvvm_saturate_f", "llvm.nvvm.saturate.ftz.f" => "__nvvm_saturate_ftz_f", @@ -5471,6 +5498,7 @@ match name { "llvm.ppc.fctiwz" => "__builtin_ppc_fctiwz", "llvm.ppc.fctudz" => "__builtin_ppc_fctudz", "llvm.ppc.fctuwz" => "__builtin_ppc_fctuwz", + "llvm.ppc.fence" => "__builtin_ppc_fence", "llvm.ppc.fmaf128.round.to.odd" => "__builtin_fmaf128_round_to_odd", "llvm.ppc.fmsub" => "__builtin_ppc_fmsub", "llvm.ppc.fmsubs" => "__builtin_ppc_fmsubs", @@ -5599,6 +5627,9 @@ match name { "llvm.ppc.qpx.qvstfs" => "__builtin_qpx_qvstfs", "llvm.ppc.qpx.qvstfsa" => "__builtin_qpx_qvstfsa", "llvm.ppc.readflm" => "__builtin_readflm", + "llvm.ppc.rldimi" => "__builtin_ppc_rldimi", + "llvm.ppc.rlwimi" => "__builtin_ppc_rlwimi", + "llvm.ppc.rlwnm" => "__builtin_ppc_rlwnm", "llvm.ppc.scalar.extract.expq" => "__builtin_vsx_scalar_extract_expq", "llvm.ppc.scalar.insert.exp.qp" => "__builtin_vsx_scalar_insert_exp_qp", "llvm.ppc.set.texasr" => "__builtin_set_texasr", @@ -5912,6 +5943,8 @@ match name { "llvm.s390.vupllb" => "__builtin_s390_vupllb", "llvm.s390.vupllf" => "__builtin_s390_vupllf", "llvm.s390.vupllh" => "__builtin_s390_vupllh", + // spv + "llvm.spv.create.handle" => "__builtin_hlsl_create_handle", // ve "llvm.ve.vl.andm.MMM" => "__builtin_ve_vl_andm_MMM", "llvm.ve.vl.andm.mmm" => "__builtin_ve_vl_andm_mmm", diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs index ce8dee69a98..a1270482219 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/llvm.rs @@ -15,7 +15,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( // Some LLVM intrinsics do not map 1-to-1 to GCC intrinsics, so we add the missing // arguments here. if gcc_func.get_param_count() != args.len() { - match &*func_name { + match func_name { // NOTE: the following intrinsics have a different number of parameters in LLVM and GCC. "__builtin_ia32_prold512_mask" | "__builtin_ia32_pmuldq512_mask" @@ -380,7 +380,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>( _ => (), } } else { - match &*func_name { + match func_name { "__builtin_ia32_rndscaless_mask_round" | "__builtin_ia32_rndscalesd_mask_round" => { let new_args = args.to_vec(); let arg3_type = gcc_func.get_param_type(2); @@ -629,17 +629,22 @@ pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function #[cfg(feature = "master")] pub fn intrinsic<'gcc, 'tcx>(name: &str, cx: &CodegenCx<'gcc, 'tcx>) -> Function<'gcc> { - match name { + let gcc_name = match name { "llvm.prefetch" => { let gcc_name = "__builtin_prefetch"; let func = cx.context.get_builtin_function(gcc_name); cx.functions.borrow_mut().insert(gcc_name.to_string(), func); return func; } - _ => (), - } - let gcc_name = match name { + "llvm.aarch64.isb" => { + // FIXME: GCC doesn't support __builtin_arm_isb yet, check if this builtin is OK. + let gcc_name = "__atomic_thread_fence"; + let func = cx.context.get_builtin_function(gcc_name); + cx.functions.borrow_mut().insert(gcc_name.to_string(), func); + return func; + } + "llvm.x86.xgetbv" => "__builtin_ia32_xgetbv", // NOTE: this doc specifies the equivalent GCC builtins: http://huonw.github.io/llvmint/llvmint/x86/index.html "llvm.sqrt.v2f64" => "__builtin_ia32_sqrtpd", diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs index 43f12b514af..9352a67e362 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/mod.rs @@ -91,7 +91,7 @@ fn get_simple_intrinsic<'gcc, 'tcx>( sym::abort => "abort", _ => return None, }; - Some(cx.context.get_builtin_function(&gcc_name)) + Some(cx.context.get_builtin_function(gcc_name)) } impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { @@ -122,10 +122,17 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout); let simple = get_simple_intrinsic(self, name); + + // FIXME(tempdragon): Re-enable `clippy::suspicious_else_formatting` if the following issue is solved: + // https://github.com/rust-lang/rust-clippy/issues/12497 + // and leave `else if use_integer_compare` to be placed "as is". + #[allow(clippy::suspicious_else_formatting)] let llval = match name { _ if simple.is_some() => { // FIXME(antoyo): remove this cast when the API supports function. - let func = unsafe { std::mem::transmute(simple.expect("simple")) }; + let func = unsafe { + std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(simple.expect("simple")) + }; self.call( self.type_void(), None, @@ -167,7 +174,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { sym::volatile_load | sym::unaligned_volatile_load => { let tp_ty = fn_args.type_at(0); let ptr = args[0].immediate(); - let load = if let PassMode::Cast { cast: ty, pad_i32: _ } = &fn_abi.ret.mode { + let load = if let PassMode::Cast { cast: ref ty, pad_i32: _ } = fn_abi.ret.mode { let gcc_ty = ty.gcc_type(self); self.volatile_load(gcc_ty, ptr) } else { @@ -213,12 +220,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { let after_block = func.new_block("after"); let arg = args[0].immediate(); - let result = func.new_local(None, arg.get_type(), "zeros"); + let result = func.new_local(None, self.u32_type, "zeros"); let zero = self.cx.gcc_zero(arg.get_type()); let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero); self.llbb().end_with_conditional(None, cond, then_block, else_block); - let zero_result = self.cx.gcc_uint(arg.get_type(), width); + let zero_result = self.cx.gcc_uint(self.u32_type, width); then_block.add_assignment(None, result, zero_result); then_block.end_with_jump(None, after_block); @@ -386,7 +393,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> { }; if !fn_abi.ret.is_ignore() { - if let PassMode::Cast { cast: ty, .. } = &fn_abi.ret.mode { + if let PassMode::Cast { cast: ref ty, .. } = fn_abi.ret.mode { let ptr_llty = self.type_ptr_to(ty.gcc_type(self)); let ptr = self.pointercast(result.val.llval, ptr_llty); self.store(llval, ptr, result.val.align); @@ -592,7 +599,7 @@ fn int_type_width_signed<'gcc, 'tcx>( ty: Ty<'tcx>, cx: &CodegenCx<'gcc, 'tcx>, ) -> Option<(u64, bool)> { - match ty.kind() { + match *ty.kind() { ty::Int(t) => Some(( match t { rustc_middle::ty::IntTy::Isize => u64::from(cx.tcx.sess.target.pointer_width), @@ -698,16 +705,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { // TODO(antoyo): use width? let arg_type = arg.get_type(); + let result_type = self.u32_type; let count_leading_zeroes = // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here // instead of using is_uint(). - if arg_type.is_uint(&self.cx) { + if arg_type.is_uint(self.cx) { "__builtin_clz" } - else if arg_type.is_ulong(&self.cx) { + else if arg_type.is_ulong(self.cx) { "__builtin_clzl" } - else if arg_type.is_ulonglong(&self.cx) { + else if arg_type.is_ulonglong(self.cx) { "__builtin_clzll" } else if width == 128 { @@ -755,7 +763,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let res = self.context.new_array_access(self.location, result, index); - return self.gcc_int_cast(res.to_rvalue(), arg_type); + return self.gcc_int_cast(res.to_rvalue(), result_type); } else { let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll"); @@ -763,17 +771,18 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64; let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8); let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff; - return self.context.new_cast(self.location, res, arg_type); + return self.context.new_cast(self.location, res, result_type); }; let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes); let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]); - self.context.new_cast(self.location, res, arg_type) + self.context.new_cast(self.location, res, result_type) } fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> { - let result_type = arg.get_type(); - let arg = if result_type.is_signed(self.cx) { - let new_type = result_type.to_unsigned(self.cx); + let arg_type = arg.get_type(); + let result_type = self.u32_type; + let arg = if arg_type.is_signed(self.cx) { + let new_type = arg_type.to_unsigned(self.cx); self.gcc_int_cast(arg, new_type) } else { arg @@ -782,17 +791,17 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let (count_trailing_zeroes, expected_type) = // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here // instead of using is_uint(). - if arg_type.is_uchar(&self.cx) || arg_type.is_ushort(&self.cx) || arg_type.is_uint(&self.cx) { + if arg_type.is_uchar(self.cx) || arg_type.is_ushort(self.cx) || arg_type.is_uint(self.cx) { // NOTE: we don't need to & 0xFF for uchar because the result is undefined on zero. ("__builtin_ctz", self.cx.uint_type) } - else if arg_type.is_ulong(&self.cx) { + else if arg_type.is_ulong(self.cx) { ("__builtin_ctzl", self.cx.ulong_type) } - else if arg_type.is_ulonglong(&self.cx) { + else if arg_type.is_ulonglong(self.cx) { ("__builtin_ctzll", self.cx.ulonglong_type) } - else if arg_type.is_u128(&self.cx) { + else if arg_type.is_u128(self.cx) { // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119 let array_type = self.context.new_array_type(None, arg_type, 3); let result = self.current_func() @@ -863,18 +872,16 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> { // TODO(antoyo): use the optimized version with fewer operations. - let result_type = value.get_type(); - let value_type = result_type.to_unsigned(self.cx); + let result_type = self.u32_type; + let arg_type = value.get_type(); + let value_type = arg_type.to_unsigned(self.cx); - let value = if result_type.is_signed(self.cx) { - self.gcc_int_cast(value, value_type) - } else { - value - }; + let value = + if arg_type.is_signed(self.cx) { self.gcc_int_cast(value, value_type) } else { value }; // only break apart 128-bit ints if they're not natively supported // TODO(antoyo): remove this if/when native 128-bit integers land in libgccjit - if value_type.is_u128(&self.cx) && !self.cx.supports_128bit_integers { + if value_type.is_u128(self.cx) && !self.cx.supports_128bit_integers { let sixty_four = self.gcc_int(value_type, 64); let right_shift = self.gcc_lshr(value, sixty_four); let high = self.gcc_int_cast(right_shift, self.cx.ulonglong_type); @@ -997,7 +1004,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { // Return `result_type`'s maximum or minimum value on overflow // NOTE: convert the type to unsigned to have an unsigned shift. - let unsigned_type = result_type.to_unsigned(&self.cx); + let unsigned_type = result_type.to_unsigned(self.cx); let shifted = self.gcc_lshr( self.gcc_int_cast(lhs, unsigned_type), self.gcc_int(unsigned_type, width as i64 - 1), @@ -1189,7 +1196,7 @@ fn codegen_gnu_try<'gcc>( bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None); }); - let func = unsafe { std::mem::transmute(func) }; + let func = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) }; // Note that no invoke is used here because by definition this function // can't panic (that's what it's catching). @@ -1263,7 +1270,7 @@ fn gen_fn<'a, 'gcc, 'tcx>( // FIXME(eddyb) find a nicer way to do this. cx.linkage.set(FunctionType::Internal); let func = cx.declare_fn(name, fn_abi); - let func_val = unsafe { std::mem::transmute(func) }; + let func_val = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) }; cx.set_frame_pointer_type(func_val); cx.apply_target_cpu_attr(func_val); let block = Builder::append_block(cx, func_val, "entry-block"); diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs index 1625e6ecdb7..ba214a9c24c 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs @@ -13,6 +13,7 @@ use rustc_codegen_ssa::errors::InvalidMonomorphization; use rustc_codegen_ssa::mir::operand::OperandRef; use rustc_codegen_ssa::mir::place::PlaceRef; use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods}; +#[cfg(feature = "master")] use rustc_hir as hir; use rustc_middle::mir::BinOp; use rustc_middle::span_bug; @@ -72,11 +73,11 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let expected_bytes = len / 8 + ((len % 8 > 0) as u64); let mask_ty = arg_tys[0]; - let mut mask = match mask_ty.kind() { + let mut mask = match *mask_ty.kind() { ty::Int(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(), ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => args[0].immediate(), ty::Array(elem, len) - if matches!(elem.kind(), ty::Uint(ty::UintTy::U8)) + if matches!(*elem.kind(), ty::Uint(ty::UintTy::U8)) && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all()) == Some(expected_bytes) => { @@ -309,10 +310,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( }) .collect(); return Ok(bx.context.new_rvalue_from_vector(None, v_type, &elems)); - } else { - // avoid the unnecessary truncation as an optimization. - return Ok(bx.context.new_bitcast(None, result, v_type)); } + // avoid the unnecessary truncation as an optimization. + return Ok(bx.context.new_bitcast(None, result, v_type)); } // since gcc doesn't have vector shuffle methods available in non-patched builds, fallback to // component-wise bitreverses if they're not available. @@ -342,11 +342,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( .map(|i| { let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64); let value = bx.extract_element(vector, index).to_rvalue(); - if name == sym::simd_ctlz { - bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value) + let value_type = value.get_type(); + let element = if name == sym::simd_ctlz { + bx.count_leading_zeroes(value_type.get_size() as u64 * 8, value) } else { - bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value) - } + bx.count_trailing_zeroes(value_type.get_size() as u64 * 8, value) + }; + bx.context.new_cast(None, element, value_type) }) .collect(); return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements)); @@ -355,8 +357,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( if name == sym::simd_shuffle { // Make sure this is actually an array, since typeck only checks the length-suffixed // version of this intrinsic. - let n: u64 = match args[2].layout.ty.kind() { - ty::Array(ty, len) if matches!(ty.kind(), ty::Uint(ty::UintTy::U32)) => { + let n: u64 = match *args[2].layout.ty.kind() { + ty::Array(ty, len) if matches!(*ty.kind(), ty::Uint(ty::UintTy::U32)) => { len.try_eval_target_usize(bx.cx.tcx, ty::ParamEnv::reveal_all()).unwrap_or_else( || span_bug!(span, "could not evaluate shuffle index array length"), ) @@ -429,13 +431,148 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( m_len == v_len, InvalidMonomorphization::MismatchedLengths { span, name, m_len, v_len } ); - match m_elem_ty.kind() { + match *m_elem_ty.kind() { ty::Int(_) => {} _ => return_error!(InvalidMonomorphization::MaskType { span, name, ty: m_elem_ty }), } return Ok(bx.vector_select(args[0].immediate(), args[1].immediate(), args[2].immediate())); } + if name == sym::simd_cast_ptr { + require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty }); + let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx()); + + require!( + in_len == out_len, + InvalidMonomorphization::ReturnLengthInputType { + span, + name, + in_len, + in_ty, + ret_ty, + out_len + } + ); + + match *in_elem.kind() { + ty::RawPtr(p_ty, _) => { + let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| { + bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty) + }); + require!( + metadata.is_unit(), + InvalidMonomorphization::CastFatPointer { span, name, ty: in_elem } + ); + } + _ => { + return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem }) + } + } + match *out_elem.kind() { + ty::RawPtr(p_ty, _) => { + let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| { + bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty) + }); + require!( + metadata.is_unit(), + InvalidMonomorphization::CastFatPointer { span, name, ty: out_elem } + ); + } + _ => { + return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem }) + } + } + + let arg = args[0].immediate(); + let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type(); + let values: Vec<_> = (0..in_len) + .map(|i| { + let idx = bx.gcc_int(bx.usize_type, i as _); + let value = bx.extract_element(arg, idx); + bx.pointercast(value, elem_type) + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values)); + } + + if name == sym::simd_expose_provenance { + require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty }); + let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx()); + + require!( + in_len == out_len, + InvalidMonomorphization::ReturnLengthInputType { + span, + name, + in_len, + in_ty, + ret_ty, + out_len + } + ); + + match *in_elem.kind() { + ty::RawPtr(_, _) => {} + _ => { + return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem }) + } + } + match *out_elem.kind() { + ty::Uint(ty::UintTy::Usize) => {} + _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: out_elem }), + } + + let arg = args[0].immediate(); + let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type(); + let values: Vec<_> = (0..in_len) + .map(|i| { + let idx = bx.gcc_int(bx.usize_type, i as _); + let value = bx.extract_element(arg, idx); + bx.ptrtoint(value, elem_type) + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values)); + } + + if name == sym::simd_with_exposed_provenance { + require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty }); + let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx()); + + require!( + in_len == out_len, + InvalidMonomorphization::ReturnLengthInputType { + span, + name, + in_len, + in_ty, + ret_ty, + out_len + } + ); + + match *in_elem.kind() { + ty::Uint(ty::UintTy::Usize) => {} + _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: in_elem }), + } + match *out_elem.kind() { + ty::RawPtr(_, _) => {} + _ => { + return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem }) + } + } + + let arg = args[0].immediate(); + let elem_type = llret_ty.dyncast_vector().expect("vector return type").get_element_type(); + let values: Vec<_> = (0..in_len) + .map(|i| { + let idx = bx.gcc_int(bx.usize_type, i as _); + let value = bx.extract_element(arg, idx); + bx.inttoptr(value, elem_type) + }) + .collect(); + return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values)); + } + #[cfg(feature = "master")] if name == sym::simd_cast || name == sym::simd_as { require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty }); @@ -462,13 +599,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( Unsupported, } - let in_style = match in_elem.kind() { + let in_style = match *in_elem.kind() { ty::Int(_) | ty::Uint(_) => Style::Int, ty::Float(_) => Style::Float, _ => Style::Unsupported, }; - let out_style = match out_elem.kind() { + let out_style = match *out_elem.kind() { ty::Int(_) | ty::Uint(_) => Style::Int, ty::Float(_) => Style::Float, _ => Style::Unsupported, @@ -495,7 +632,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( macro_rules! arith_binary { ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => { $(if name == sym::$name { - match in_elem.kind() { + match *in_elem.kind() { $($(ty::$p(_))|* => { return Ok(bx.$call(args[0].immediate(), args[1].immediate())) })* @@ -533,7 +670,6 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let sign_shift = bx.context.new_rvalue_from_int(elem_type, elem_size as i32 - 1); let one = bx.context.new_rvalue_one(elem_type); - let mut shift = 0; for i in 0..in_len { let elem = bx.extract_element(vector, bx.context.new_rvalue_from_int(bx.int_type, i as i32)); @@ -541,17 +677,16 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let masked = shifted & one; result = result | (bx.context.new_cast(None, masked, result_type) - << bx.context.new_rvalue_from_int(result_type, shift)); - shift += 1; + << bx.context.new_rvalue_from_int(result_type, i as i32)); } - match ret_ty.kind() { + match *ret_ty.kind() { ty::Uint(i) if i.bit_width() == Some(expected_int_bits) => { // Zero-extend iN to the bitmask type: return Ok(result); } ty::Array(elem, len) - if matches!(elem.kind(), ty::Uint(ty::UintTy::U8)) + if matches!(*elem.kind(), ty::Uint(ty::UintTy::U8)) && len.try_eval_target_usize(bx.tcx, ty::ParamEnv::reveal_all()) == Some(expected_bytes) => { @@ -590,7 +725,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( return Err(()); }}; } - let (elem_ty_str, elem_ty) = if let ty::Float(f) = in_elem.kind() { + let (elem_ty_str, elem_ty) = if let ty::Float(ref f) = *in_elem.kind() { let elem_ty = bx.cx.type_float_from_ty(*f); match f.bit_width() { 32 => ("f", elem_ty), @@ -816,7 +951,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx()); let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx()); let (pointer_count, underlying_ty) = match *element_ty1.kind() { - ty::RawPtr(p_ty, _) if p_ty == in_elem => (ptr_count(element_ty1), non_ptr(element_ty1)), + ty::RawPtr(p_ty, _) if p_ty == in_elem => { + (ptr_count(element_ty1), non_ptr(element_ty1)) + } _ => { require!( false, @@ -839,7 +976,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( // The element type of the third argument must be a signed integer type of any width: let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx()); - match element_ty2.kind() { + match *element_ty2.kind() { ty::Int(_) => (), _ => { require!( @@ -955,7 +1092,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( assert_eq!(underlying_ty, non_ptr(element_ty0)); // The element type of the third argument must be a signed integer type of any width: - match element_ty2.kind() { + match *element_ty2.kind() { ty::Int(_) => (), _ => { require!( @@ -1013,7 +1150,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( macro_rules! arith_unary { ($($name: ident: $($($p: ident),* => $call: ident),*;)*) => { $(if name == sym::$name { - match in_elem.kind() { + match *in_elem.kind() { $($(ty::$p(_))|* => { return Ok(bx.$call(args[0].immediate())) })* @@ -1137,7 +1274,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( ret_ty == in_elem, InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty } ); - return match in_elem.kind() { + return match *in_elem.kind() { ty::Int(_) | ty::Uint(_) => { let r = bx.vector_reduce_op(args[0].immediate(), $vec_op); if $ordered { @@ -1206,7 +1343,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( ret_ty == in_elem, InvalidMonomorphization::ReturnType { span, name, in_elem, in_ty, ret_ty } ); - return match in_elem.kind() { + return match *in_elem.kind() { ty::Int(_) | ty::Uint(_) => Ok(bx.$int_red(args[0].immediate())), ty::Float(_) => Ok(bx.$float_red(args[0].immediate())), _ => return_error!(InvalidMonomorphization::UnsupportedSymbol { @@ -1235,7 +1372,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( ); args[0].immediate() } else { - match in_elem.kind() { + match *in_elem.kind() { ty::Int(_) | ty::Uint(_) => {} _ => return_error!(InvalidMonomorphization::UnsupportedSymbol { span, @@ -1249,7 +1386,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( args[0].immediate() }; - return match in_elem.kind() { + return match *in_elem.kind() { ty::Int(_) | ty::Uint(_) => { let r = bx.vector_reduce_op(input, $op); Ok(if !$boolean { diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs index 24856506c46..1132b0cd2f5 100644 --- a/compiler/rustc_codegen_gcc/src/lib.rs +++ b/compiler/rustc_codegen_gcc/src/lib.rs @@ -4,7 +4,7 @@ * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html). * For Thin LTO, this might be helpful: * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none. - * Or the new incremental LTO? + * Or the new incremental LTO (https://www.phoronix.com/news/GCC-Incremental-LTO-Patches)? * * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans. @@ -16,12 +16,13 @@ #![allow(internal_features)] #![doc(rust_logo)] #![feature(rustdoc_internals)] -#![feature(rustc_private, decl_macro, never_type, trusted_len, hash_raw_entry)] +#![feature(rustc_private, decl_macro, never_type, trusted_len, hash_raw_entry, let_chains)] #![allow(broken_intra_doc_links)] #![recursion_limit = "256"] #![warn(rust_2018_idioms)] #![warn(unused_lifetimes)] #![deny(clippy::pattern_type_mismatch)] +#![allow(clippy::needless_lifetimes)] extern crate rustc_apfloat; extern crate rustc_ast; @@ -73,6 +74,7 @@ mod type_of; use std::any::Any; use std::fmt::Debug; +use std::ops::Deref; #[cfg(not(feature = "master"))] use std::sync::atomic::AtomicBool; #[cfg(not(feature = "master"))] @@ -80,8 +82,9 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::sync::Mutex; +use back::lto::ThinBuffer; +use back::lto::ThinData; use errors::LTONotSupported; -#[cfg(not(feature = "master"))] use gccjit::CType; use gccjit::{Context, OptimizationLevel}; #[cfg(feature = "master")] @@ -92,9 +95,7 @@ use rustc_codegen_ssa::back::write::{ CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn, }; use rustc_codegen_ssa::base::codegen_crate; -use rustc_codegen_ssa::traits::{ - CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods, -}; +use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods}; use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen}; use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::sync::IntoDynSyncSend; @@ -139,6 +140,10 @@ impl TargetInfo { fn supports_128bit_int(&self) -> bool { self.supports_128bit_integers.load(Ordering::SeqCst) } + + fn supports_target_dependent_type(&self, _typ: CType) -> bool { + false + } } #[derive(Clone)] @@ -160,6 +165,10 @@ impl LockedTargetInfo { fn supports_128bit_int(&self) -> bool { self.info.lock().expect("lock").supports_128bit_int() } + + fn supports_target_dependent_type(&self, typ: CType) -> bool { + self.info.lock().expect("lock").supports_target_dependent_type(typ) + } } #[derive(Clone)] @@ -188,6 +197,7 @@ impl CodegenBackend for GccCodegenBackend { #[cfg(feature = "master")] gccjit::set_global_personality_function_name(b"rust_eh_personality\0"); + if sess.lto() == Lto::Thin { sess.dcx().emit_warn(LTONotSupported {}); } @@ -293,7 +303,7 @@ impl ExtraBackendMethods for GccCodegenBackend { alloc_error_handler_kind: AllocatorKind, ) -> Self::Module { let mut mods = GccContext { - context: new_context(tcx), + context: Arc::new(SyncContext::new(new_context(tcx))), should_combine_object_files: false, temp_dir: None, }; @@ -323,35 +333,42 @@ impl ExtraBackendMethods for GccCodegenBackend { } } -pub struct ThinBuffer; +pub struct GccContext { + context: Arc<SyncContext>, + should_combine_object_files: bool, + // Temporary directory used by LTO. We keep it here so that it's not removed before linking. + temp_dir: Option<TempDir>, +} -impl ThinBufferMethods for ThinBuffer { - fn data(&self) -> &[u8] { - unimplemented!(); - } +struct SyncContext { + context: Context<'static>, +} - fn thin_link_data(&self) -> &[u8] { - unimplemented!(); +impl SyncContext { + fn new(context: Context<'static>) -> Self { + Self { context } } } -pub struct GccContext { - context: Context<'static>, - should_combine_object_files: bool, - // Temporary directory used by LTO. We keep it here so that it's not removed before linking. - temp_dir: Option<TempDir>, +impl Deref for SyncContext { + type Target = Context<'static>; + + fn deref(&self) -> &Self::Target { + &self.context + } } -unsafe impl Send for GccContext {} -// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm". Try to disable it here. -unsafe impl Sync for GccContext {} +unsafe impl Send for SyncContext {} +// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm". +// TODO: disable it here by returing false in CodegenBackend::supports_parallel(). +unsafe impl Sync for SyncContext {} impl WriteBackendMethods for GccCodegenBackend { type Module = GccContext; type TargetMachine = (); type TargetMachineError = (); type ModuleBuffer = ModuleBuffer; - type ThinData = (); + type ThinData = ThinData; type ThinBuffer = ThinBuffer; fn run_fat_lto( @@ -363,11 +380,11 @@ impl WriteBackendMethods for GccCodegenBackend { } fn run_thin_lto( - _cgcx: &CodegenContext<Self>, - _modules: Vec<(String, Self::ThinBuffer)>, - _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, + cgcx: &CodegenContext<Self>, + modules: Vec<(String, Self::ThinBuffer)>, + cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> { - unimplemented!(); + back::lto::run_thin(cgcx, modules, cached_modules) } fn print_pass_timings(&self) { @@ -397,10 +414,10 @@ impl WriteBackendMethods for GccCodegenBackend { } unsafe fn optimize_thin( - _cgcx: &CodegenContext<Self>, - _thin: ThinModule<Self>, + cgcx: &CodegenContext<Self>, + thin: ThinModule<Self>, ) -> Result<ModuleCodegen<Self::Module>, FatalError> { - unimplemented!(); + back::lto::optimize_thin_module(thin, cgcx) } unsafe fn codegen( @@ -413,10 +430,10 @@ impl WriteBackendMethods for GccCodegenBackend { } fn prepare_thin( - _module: ModuleCodegen<Self::Module>, - _emit_summary: bool, + module: ModuleCodegen<Self::Module>, + emit_summary: bool, ) -> (String, Self::ThinBuffer) { - unimplemented!(); + back::lto::prepare_thin(module, emit_summary) } fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) { @@ -437,7 +454,8 @@ impl WriteBackendMethods for GccCodegenBackend { pub fn __rustc_codegen_backend() -> Box<dyn CodegenBackend> { #[cfg(feature = "master")] let info = { - // Check whether the target supports 128-bit integers. + // Check whether the target supports 128-bit integers, and sized floating point types (like + // Float16). let context = Context::default(); Arc::new(Mutex::new(IntoDynSyncSend(context.get_target_info()))) }; @@ -467,6 +485,7 @@ pub fn target_features( allow_unstable: bool, target_info: &LockedTargetInfo, ) -> Vec<Symbol> { + // TODO(antoyo): use global_gcc_features. sess.target .supported_target_features() .iter() @@ -477,8 +496,12 @@ pub fn target_features( None } }) - .filter(|_feature| { - target_info.cpu_supports(_feature) + .filter(|feature| { + // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it. + if *feature == "neon" { + return false; + } + target_info.cpu_supports(feature) /* adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma, avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq, diff --git a/compiler/rustc_codegen_gcc/src/mono_item.rs b/compiler/rustc_codegen_gcc/src/mono_item.rs index 359d3c70b4c..44657ad4f6e 100644 --- a/compiler/rustc_codegen_gcc/src/mono_item.rs +++ b/compiler/rustc_codegen_gcc/src/mono_item.rs @@ -81,6 +81,6 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> { // TODO(antoyo): use inline attribute from there in linkage.set() above. self.functions.borrow_mut().insert(symbol_name.to_string(), decl); - self.function_instances.borrow_mut().insert(instance, unsafe { std::mem::transmute(decl) }); + self.function_instances.borrow_mut().insert(instance, decl); } } diff --git a/compiler/rustc_codegen_gcc/src/type_.rs b/compiler/rustc_codegen_gcc/src/type_.rs index 4caff2e6310..e20234c5ce7 100644 --- a/compiler/rustc_codegen_gcc/src/type_.rs +++ b/compiler/rustc_codegen_gcc/src/type_.rs @@ -1,3 +1,8 @@ +#[cfg(feature = "master")] +use std::convert::TryInto; + +#[cfg(feature = "master")] +use gccjit::CType; use gccjit::{RValue, Struct, Type}; use rustc_codegen_ssa::common::TypeKind; use rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods, TypeMembershipMethods}; @@ -142,25 +147,76 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> { } fn type_f16(&self) -> Type<'gcc> { - unimplemented!("f16_f128") + #[cfg(feature = "master")] + if self.supports_f16_type { + return self.context.new_c_type(CType::Float16); + } + bug!("unsupported float width 16") } fn type_f32(&self) -> Type<'gcc> { + #[cfg(feature = "master")] + if self.supports_f32_type { + return self.context.new_c_type(CType::Float32); + } self.float_type } fn type_f64(&self) -> Type<'gcc> { + #[cfg(feature = "master")] + if self.supports_f64_type { + return self.context.new_c_type(CType::Float64); + } self.double_type } fn type_f128(&self) -> Type<'gcc> { - unimplemented!("f16_f128") + #[cfg(feature = "master")] + if self.supports_f128_type { + return self.context.new_c_type(CType::Float128); + } + bug!("unsupported float width 128") } fn type_func(&self, params: &[Type<'gcc>], return_type: Type<'gcc>) -> Type<'gcc> { self.context.new_function_pointer_type(None, return_type, params, false) } + #[cfg(feature = "master")] + fn type_kind(&self, typ: Type<'gcc>) -> TypeKind { + if self.is_int_type_or_bool(typ) { + TypeKind::Integer + } else if typ.get_pointee().is_some() { + TypeKind::Pointer + } else if typ.is_vector() { + TypeKind::Vector + } else if typ.dyncast_array().is_some() { + TypeKind::Array + } else if typ.is_struct().is_some() { + TypeKind::Struct + } else if typ.dyncast_function_ptr_type().is_some() { + TypeKind::Function + } else if typ.is_compatible_with(self.float_type) { + TypeKind::Float + } else if typ.is_compatible_with(self.double_type) { + TypeKind::Double + } else if typ.is_floating_point() { + match typ.get_size() { + 2 => TypeKind::Half, + 4 => TypeKind::Float, + 8 => TypeKind::Double, + 16 => TypeKind::FP128, + size => unreachable!("Floating-point type of size {}", size), + } + } else if typ == self.type_void() { + TypeKind::Void + } else { + // TODO(antoyo): support other types. + unimplemented!(); + } + } + + #[cfg(not(feature = "master"))] fn type_kind(&self, typ: Type<'gcc>) -> TypeKind { if self.is_int_type_or_bool(typ) { TypeKind::Integer @@ -170,9 +226,19 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> { TypeKind::Double } else if typ.is_vector() { TypeKind::Vector + } else if typ.get_pointee().is_some() { + TypeKind::Pointer + } else if typ.dyncast_array().is_some() { + TypeKind::Array + } else if typ.is_struct().is_some() { + TypeKind::Struct + } else if typ.dyncast_function_ptr_type().is_some() { + TypeKind::Function + } else if typ == self.type_void() { + TypeKind::Void } else { // TODO(antoyo): support other types. - TypeKind::Void + unimplemented!(); } } @@ -200,6 +266,16 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> { unimplemented!(); } + #[cfg(feature = "master")] + fn float_width(&self, typ: Type<'gcc>) -> usize { + if typ.is_floating_point() { + (typ.get_size() * u8::BITS).try_into().unwrap() + } else { + panic!("Cannot get width of float type {:?}", typ); + } + } + + #[cfg(not(feature = "master"))] fn float_width(&self, typ: Type<'gcc>) -> usize { let f32 = self.context.new_type::<f32>(); let f64 = self.context.new_type::<f64>(); diff --git a/compiler/rustc_codegen_gcc/src/type_of.rs b/compiler/rustc_codegen_gcc/src/type_of.rs index a88d50cb434..d85ed4f12ff 100644 --- a/compiler/rustc_codegen_gcc/src/type_of.rs +++ b/compiler/rustc_codegen_gcc/src/type_of.rs @@ -8,7 +8,7 @@ use rustc_middle::ty::print::with_no_trimmed_paths; use rustc_middle::ty::{self, CoroutineArgsExt, Ty, TypeVisitableExt}; use rustc_target::abi::call::{CastTarget, FnAbi, Reg}; use rustc_target::abi::{ - self, Abi, Align, FieldsShape, Float, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface, + self, Abi, FieldsShape, Float, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface, Variants, }; @@ -53,12 +53,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> { } } -impl<'a, 'tcx> CodegenCx<'a, 'tcx> { - pub fn align_of(&self, ty: Ty<'tcx>) -> Align { - self.layout_of(ty).align.abi - } -} - fn uncached_gcc_type<'gcc, 'tcx>( cx: &CodegenCx<'gcc, 'tcx>, layout: TyAndLayout<'tcx>, @@ -90,7 +84,7 @@ fn uncached_gcc_type<'gcc, 'tcx>( Abi::Uninhabited | Abi::Aggregate { .. } => {} } - let name = match layout.ty.kind() { + let name = match *layout.ty.kind() { // FIXME(eddyb) producing readable type names for trait objects can result // in problematically distinct types due to HRTB and subtyping (see #47638). // ty::Dynamic(..) | @@ -220,7 +214,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> { // to fn_ptr_backend_type handle the on-stack attribute. // TODO(antoyo): find a less hackish way to hande the on-stack attribute. ty::FnPtr(sig) => { - cx.fn_ptr_backend_type(&cx.fn_abi_of_fn_ptr(sig, ty::List::empty())) + cx.fn_ptr_backend_type(cx.fn_abi_of_fn_ptr(sig, ty::List::empty())) } _ => self.scalar_gcc_type_at(cx, scalar, Size::ZERO), }; |
