diff options
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
35 files changed, 1366 insertions, 1241 deletions
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs index 4b07c8aef91..043123fcab2 100644 --- a/compiler/rustc_codegen_llvm/src/abi.rs +++ b/compiler/rustc_codegen_llvm/src/abi.rs @@ -24,6 +24,7 @@ use crate::attributes::{self, llfn_attrs_from_instance}; use crate::builder::Builder; use crate::context::CodegenCx; use crate::llvm::{self, Attribute, AttributePlace}; +use crate::llvm_util; use crate::type_::Type; use crate::type_of::LayoutLlvmExt; use crate::value::Value; @@ -146,7 +147,7 @@ impl LlvmType for CastTarget { "total size {:?} cannot be divided into units of zero size", self.rest.total ); - if self.rest.total.bytes() % self.rest.unit.size.bytes() != 0 { + if !self.rest.total.bytes().is_multiple_of(self.rest.unit.size.bytes()) { assert_eq!(self.rest.unit.kind, RegKind::Integer, "only int regs can be split"); } self.rest.total.bytes().div_ceil(self.rest.unit.size.bytes()) @@ -500,7 +501,16 @@ impl<'ll, 'tcx> FnAbiLlvmExt<'ll, 'tcx> for FnAbi<'tcx, Ty<'tcx>> { } } PassMode::Indirect { attrs, meta_attrs: None, on_stack: false } => { - apply(attrs); + let i = apply(attrs); + if cx.sess().opts.optimize != config::OptLevel::No + && llvm_util::get_version() >= (21, 0, 0) + { + attributes::apply_to_llfn( + llfn, + llvm::AttributePlace::Argument(i), + &[llvm::AttributeKind::DeadOnReturn.create_attr(cx.llcx)], + ); + } } PassMode::Indirect { attrs, meta_attrs: Some(meta_attrs), on_stack } => { assert!(!on_stack); diff --git a/compiler/rustc_codegen_llvm/src/allocator.rs b/compiler/rustc_codegen_llvm/src/allocator.rs index 9dca63cfc8d..2b5090ed6db 100644 --- a/compiler/rustc_codegen_llvm/src/allocator.rs +++ b/compiler/rustc_codegen_llvm/src/allocator.rs @@ -11,7 +11,7 @@ use rustc_symbol_mangling::mangle_internal_symbol; use crate::builder::SBuilder; use crate::declare::declare_simple_fn; -use crate::llvm::{self, False, True, Type}; +use crate::llvm::{self, False, True, Type, Value}; use crate::{SimpleCx, attributes, debuginfo}; pub(crate) unsafe fn codegen( @@ -73,13 +73,14 @@ pub(crate) unsafe fn codegen( ); unsafe { - // __rust_alloc_error_handler_should_panic - let name = mangle_internal_symbol(tcx, OomStrategy::SYMBOL); - let ll_g = cx.declare_global(&name, i8); - llvm::set_visibility(ll_g, llvm::Visibility::from_generic(tcx.sess.default_visibility())); - let val = tcx.sess.opts.unstable_opts.oom.should_panic(); - let llval = llvm::LLVMConstInt(i8, val as u64, False); - llvm::set_initializer(ll_g, llval); + // __rust_alloc_error_handler_should_panic_v2 + create_const_value_function( + tcx, + &cx, + &mangle_internal_symbol(tcx, OomStrategy::SYMBOL), + &i8, + &llvm::LLVMConstInt(i8, tcx.sess.opts.unstable_opts.oom.should_panic() as u64, False), + ); // __rust_no_alloc_shim_is_unstable_v2 create_wrapper_function( @@ -100,6 +101,34 @@ pub(crate) unsafe fn codegen( } } +fn create_const_value_function( + tcx: TyCtxt<'_>, + cx: &SimpleCx<'_>, + name: &str, + output: &Type, + value: &Value, +) { + let ty = cx.type_func(&[], output); + let llfn = declare_simple_fn( + &cx, + name, + llvm::CallConv::CCallConv, + llvm::UnnamedAddr::Global, + llvm::Visibility::from_generic(tcx.sess.default_visibility()), + ty, + ); + + attributes::apply_to_llfn( + llfn, + llvm::AttributePlace::Function, + &[llvm::AttributeKind::AlwaysInline.create_attr(cx.llcx)], + ); + + let llbb = unsafe { llvm::LLVMAppendBasicBlockInContext(cx.llcx, llfn, c"entry".as_ptr()) }; + let mut bx = SBuilder::build(&cx, llbb); + bx.ret(value); +} + fn create_wrapper_function( tcx: TyCtxt<'_>, cx: &SimpleCx<'_>, diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index 9ddadcf16aa..a643a91141e 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -384,15 +384,19 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> { ) { let asm_arch = self.tcx.sess.asm_arch.unwrap(); - // Default to Intel syntax on x86 - let intel_syntax = matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64) - && !options.contains(InlineAsmOptions::ATT_SYNTAX); - // Build the template string let mut template_str = String::new(); - if intel_syntax { - template_str.push_str(".intel_syntax\n"); + + // On X86 platforms there are two assembly syntaxes. Rust uses intel by default, + // but AT&T can be specified explicitly. + if matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64) { + if options.contains(InlineAsmOptions::ATT_SYNTAX) { + template_str.push_str(".att_syntax\n") + } else { + template_str.push_str(".intel_syntax\n") + } } + for piece in template { match *piece { InlineAsmTemplatePiece::String(ref s) => template_str.push_str(s), @@ -431,7 +435,11 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> { } } } - if intel_syntax { + + // Just to play it safe, if intel was used, reset the assembly syntax to att. + if matches!(asm_arch, InlineAsmArch::X86 | InlineAsmArch::X86_64) + && !options.contains(InlineAsmOptions::ATT_SYNTAX) + { template_str.push_str("\n.att_syntax\n"); } diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 27fd09745ff..c548f467583 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -1,6 +1,6 @@ //! Set and unset common attributes on LLVM values. -use rustc_attr_data_structures::{InlineAttr, InstructionSetAttr, OptimizeAttr}; use rustc_codegen_ssa::traits::*; +use rustc_hir::attrs::{InlineAttr, InstructionSetAttr, OptimizeAttr}; use rustc_hir::def_id::DefId; use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, PatchableFunctionEntry}; use rustc_middle::ty::{self, TyCtxt}; @@ -344,7 +344,7 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( llfn: &'ll Value, instance: ty::Instance<'tcx>, ) { - let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id()); + let codegen_fn_attrs = cx.tcx.codegen_instance_attrs(instance.def); let mut to_add = SmallVec::<[_; 16]>::new(); @@ -370,22 +370,6 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( }; to_add.extend(inline_attr(cx, inline)); - // The `uwtable` attribute according to LLVM is: - // - // This attribute indicates that the ABI being targeted requires that an - // unwind table entry be produced for this function even if we can show - // that no exceptions passes by it. This is normally the case for the - // ELF x86-64 abi, but it can be disabled for some compilation units. - // - // Typically when we're compiling with `-C panic=abort` (which implies this - // `no_landing_pads` check) we don't need `uwtable` because we can't - // generate any exceptions! On Windows, however, exceptions include other - // events such as illegal instructions, segfaults, etc. This means that on - // Windows we end up still needing the `uwtable` attribute even if the `-C - // panic=abort` flag is passed. - // - // You can also find more info on why Windows always requires uwtables here: - // https://bugzilla.mozilla.org/show_bug.cgi?id=1302078 if cx.sess().must_emit_unwind_tables() { to_add.push(uwtable_attr(cx.llcx, cx.sess().opts.unstable_opts.use_sync_unwind)); } @@ -491,11 +475,7 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>( let allocated_pointer = AttributeKind::AllocatedPointer.create_attr(cx.llcx); attributes::apply_to_llfn(llfn, AttributePlace::Argument(0), &[allocated_pointer]); } - // function alignment can be set globally with the `-Zmin-function-alignment=<n>` flag; - // the alignment from a `#[repr(align(<n>))]` is used if it specifies a higher alignment. - if let Some(align) = - Ord::max(cx.tcx.sess.opts.unstable_opts.min_function_alignment, codegen_fn_attrs.alignment) - { + if let Some(align) = codegen_fn_attrs.alignment { llvm::set_alignment(llfn, align); } if let Some(backchain) = backchain_attr(cx) { diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index ee46b49a094..c269f11e931 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -1,33 +1,29 @@ use std::collections::BTreeMap; use std::ffi::{CStr, CString}; use std::fs::File; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::ptr::NonNull; use std::sync::Arc; use std::{io, iter, slice}; use object::read::archive::ArchiveFile; -use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared}; -use rustc_codegen_ssa::back::symbol_export; +use object::{Object, ObjectSection}; +use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared}; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput}; use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::memmap::Mmap; use rustc_errors::{DiagCtxtHandle, FatalError}; -use rustc_hir::def_id::LOCAL_CRATE; use rustc_middle::bug; use rustc_middle::dep_graph::WorkProduct; -use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel}; -use rustc_session::config::{self, CrateType, Lto}; +use rustc_session::config::{self, Lto}; use tracing::{debug, info}; use crate::back::write::{ self, CodegenDiagnosticsStage, DiagnosticHandlers, bitcode_section_name, save_temp_bitcode, }; -use crate::errors::{ - DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, LtoProcMacro, -}; +use crate::errors::{LlvmError, LtoBitcodeFromRlib}; use crate::llvm::AttributePlace::Function; use crate::llvm::{self, build_string}; use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, attributes}; @@ -36,45 +32,21 @@ use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, attributes}; /// session to determine which CGUs we can reuse. const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin"; -fn crate_type_allows_lto(crate_type: CrateType) -> bool { - match crate_type { - CrateType::Executable - | CrateType::Dylib - | CrateType::Staticlib - | CrateType::Cdylib - | CrateType::ProcMacro - | CrateType::Sdylib => true, - CrateType::Rlib => false, - } -} - fn prepare_lto( cgcx: &CodegenContext<LlvmCodegenBackend>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], dcx: DiagCtxtHandle<'_>, ) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> { - let export_threshold = match cgcx.lto { - // We're just doing LTO for our one crate - Lto::ThinLocal => SymbolExportLevel::Rust, - - // We're doing LTO for the entire crate graph - Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types), - - Lto::No => panic!("didn't request LTO but we're doing LTO"), - }; + let mut symbols_below_threshold = exported_symbols_for_lto + .iter() + .map(|symbol| CString::new(symbol.to_owned()).unwrap()) + .collect::<Vec<CString>>(); - let symbol_filter = &|&(ref name, info): &(String, SymbolExportInfo)| { - if info.level.is_below_threshold(export_threshold) || info.used { - Some(CString::new(name.as_str()).unwrap()) - } else { - None - } - }; - let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); - let mut symbols_below_threshold = { - let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold"); - exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>() - }; - info!("{} symbols to preserve in this crate", symbols_below_threshold.len()); + // __llvm_profile_counter_bias is pulled in at link time by an undefined reference to + // __llvm_profile_runtime, therefore we won't know until link time if this symbol + // should have default visibility. + symbols_below_threshold.push(c"__llvm_profile_counter_bias".to_owned()); // If we're performing LTO for the entire crate graph, then for each of our // upstream dependencies, find the corresponding rlib and load the bitcode @@ -84,37 +56,7 @@ fn prepare_lto( // with either fat or thin LTO let mut upstream_modules = Vec::new(); if cgcx.lto != Lto::ThinLocal { - // Make sure we actually can run LTO - for crate_type in cgcx.crate_types.iter() { - if !crate_type_allows_lto(*crate_type) { - dcx.emit_err(LtoDisallowed); - return Err(FatalError); - } else if *crate_type == CrateType::Dylib { - if !cgcx.opts.unstable_opts.dylib_lto { - dcx.emit_err(LtoDylib); - return Err(FatalError); - } - } else if *crate_type == CrateType::ProcMacro && !cgcx.opts.unstable_opts.dylib_lto { - dcx.emit_err(LtoProcMacro); - return Err(FatalError); - } - } - - if cgcx.opts.cg.prefer_dynamic && !cgcx.opts.unstable_opts.dylib_lto { - dcx.emit_err(DynamicLinkingWithLTO); - return Err(FatalError); - } - - for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { - let exported_symbols = - cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); - { - let _timer = - cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold"); - symbols_below_threshold - .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter)); - } - + for path in each_linked_rlib_for_lto { let archive_data = unsafe { Mmap::map(std::fs::File::open(&path).expect("couldn't open rlib")) .expect("couldn't map rlib") @@ -147,10 +89,6 @@ fn prepare_lto( } } - // __llvm_profile_counter_bias is pulled in at link time by an undefined reference to - // __llvm_profile_runtime, therefore we won't know until link time if this symbol - // should have default visibility. - symbols_below_threshold.push(c"__llvm_profile_counter_bias".to_owned()); Ok((symbols_below_threshold, upstream_modules)) } @@ -168,46 +106,32 @@ fn get_bitcode_slice_from_object_data<'a>( // name" which in the public API for sections gets treated as part of the section name, but // internally in MachOObjectFile.cpp gets treated separately. let section_name = bitcode_section_name(cgcx).to_str().unwrap().trim_start_matches("__LLVM,"); - let mut len = 0; - let data = unsafe { - llvm::LLVMRustGetSliceFromObjectDataByName( - obj.as_ptr(), - obj.len(), - section_name.as_ptr(), - section_name.len(), - &mut len, - ) - }; - if !data.is_null() { - assert!(len != 0); - let bc = unsafe { slice::from_raw_parts(data, len) }; - // `bc` must be a sub-slice of `obj`. - assert!(obj.as_ptr() <= bc.as_ptr()); - assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr()); + let obj = + object::File::parse(obj).map_err(|err| LtoBitcodeFromRlib { err: err.to_string() })?; - Ok(bc) - } else { - assert!(len == 0); - Err(LtoBitcodeFromRlib { - llvm_err: llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string()), - }) - } + let section = obj + .section_by_name(section_name) + .ok_or_else(|| LtoBitcodeFromRlib { err: format!("Can't find section {section_name}") })?; + + section.data().map_err(|err| LtoBitcodeFromRlib { err: err.to_string() }) } /// Performs fat LTO by merging all modules into a single one and returning it /// for further optimization. pub(crate) fn run_fat( cgcx: &CodegenContext<LlvmCodegenBackend>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<FatLtoInput<LlvmCodegenBackend>>, - cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, -) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> { +) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> { let dcx = cgcx.create_dcx(); let dcx = dcx.handle(); - let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, dcx)?; + let (symbols_below_threshold, upstream_modules) = + prepare_lto(cgcx, exported_symbols_for_lto, each_linked_rlib_for_lto, dcx)?; let symbols_below_threshold = symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>(); - fat_lto(cgcx, dcx, modules, cached_modules, upstream_modules, &symbols_below_threshold) + fat_lto(cgcx, dcx, modules, upstream_modules, &symbols_below_threshold) } /// Performs thin LTO by performing necessary global analysis and returning two @@ -215,12 +139,15 @@ pub(crate) fn run_fat( /// can simply be copied over from the incr. comp. cache. pub(crate) fn run_thin( cgcx: &CodegenContext<LlvmCodegenBackend>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<(String, ThinBuffer)>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, -) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> { +) -> Result<(Vec<ThinModule<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> { let dcx = cgcx.create_dcx(); let dcx = dcx.handle(); - let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, dcx)?; + let (symbols_below_threshold, upstream_modules) = + prepare_lto(cgcx, exported_symbols_for_lto, each_linked_rlib_for_lto, dcx)?; let symbols_below_threshold = symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>(); if cgcx.opts.cg.linker_plugin_lto.enabled() { @@ -245,10 +172,9 @@ fn fat_lto( cgcx: &CodegenContext<LlvmCodegenBackend>, dcx: DiagCtxtHandle<'_>, modules: Vec<FatLtoInput<LlvmCodegenBackend>>, - cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, symbols_below_threshold: &[*const libc::c_char], -) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> { +) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> { let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module"); info!("going for a fat lto"); @@ -258,21 +184,12 @@ fn fat_lto( // modules that are serialized in-memory. // * `in_memory` contains modules which are already parsed and in-memory, // such as from multi-CGU builds. - // - // All of `cached_modules` (cached from previous incremental builds) can - // immediately go onto the `serialized_modules` modules list and then we can - // split the `modules` array into these two lists. let mut in_memory = Vec::new(); - serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| { - info!("pushing cached module {:?}", wp.cgu_name); - (buffer, CString::new(wp.cgu_name).unwrap()) - })); for module in modules { match module { FatLtoInput::InMemory(m) => in_memory.push(m), FatLtoInput::Serialized { name, buffer } => { info!("pushing serialized module {:?}", name); - let buffer = SerializedModule::Local(buffer); serialized_modules.push((buffer, CString::new(name).unwrap())); } } @@ -366,7 +283,7 @@ fn fat_lto( save_temp_bitcode(cgcx, &module, "lto.after-restriction"); } - Ok(LtoModuleCodegen::Fat(module)) + Ok(module) } pub(crate) struct Linker<'a>(&'a mut llvm::Linker<'a>); @@ -436,7 +353,7 @@ fn thin_lto( serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, symbols_below_threshold: &[*const libc::c_char], -) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> { +) -> Result<(Vec<ThinModule<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> { let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis"); unsafe { info!("going for that thin, thin LTO"); @@ -568,18 +485,15 @@ fn thin_lto( } info!(" - {}: re-compiled", module_name); - opt_jobs.push(LtoModuleCodegen::Thin(ThinModule { - shared: Arc::clone(&shared), - idx: module_index, - })); + opt_jobs.push(ThinModule { shared: Arc::clone(&shared), idx: module_index }); } // Save the current ThinLTO import information for the next compilation // session, overwriting the previous serialized data (if any). - if let Some(path) = key_map_path { - if let Err(err) = curr_key_map.save_to_file(&path) { - return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err })); - } + if let Some(path) = key_map_path + && let Err(err) = curr_key_map.save_to_file(&path) + { + return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err })); } Ok((opt_jobs, copy_jobs)) @@ -587,7 +501,7 @@ fn thin_lto( } fn enable_autodiff_settings(ad: &[config::AutoDiff]) { - for &val in ad { + for val in ad { // We intentionally don't use a wildcard, to not forget handling anything new. match val { config::AutoDiff::PrintPerf => { @@ -599,6 +513,10 @@ fn enable_autodiff_settings(ad: &[config::AutoDiff]) { config::AutoDiff::PrintTA => { llvm::set_print_type(true); } + config::AutoDiff::PrintTAFn(fun) => { + llvm::set_print_type(true); // Enable general type printing + llvm::set_print_type_fun(&fun); // Set specific function to analyze + } config::AutoDiff::Inline => { llvm::set_inline(true); } @@ -653,6 +571,7 @@ pub(crate) fn run_pass_manager( // We then run the llvm_optimize function a second time, to optimize the code which we generated // in the enzyme differentiation pass. let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable); + let enable_gpu = config.offload.contains(&config::Offload::Enable); let stage = if thin { write::AutodiffStage::PreAD } else { @@ -667,6 +586,12 @@ pub(crate) fn run_pass_manager( write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?; } + if enable_gpu && !thin { + let cx = + SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size); + crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx); + } + if cfg!(llvm_enzyme) && enable_ad && !thin { let cx = SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size); @@ -676,7 +601,7 @@ pub(crate) fn run_pass_manager( if attributes::has_string_attr(function, enzyme_marker) { // Sanity check: Ensure 'noinline' is present before replacing it. assert!( - !attributes::has_attr(function, Function, llvm::AttributeKind::NoInline), + attributes::has_attr(function, Function, llvm::AttributeKind::NoInline), "Expected __enzyme function to have 'noinline' before adding 'alwaysinline'" ); diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs index dfde4595590..8e82013e94a 100644 --- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs +++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs @@ -39,6 +39,7 @@ impl OwnedTargetMachine { debug_info_compression: &CStr, use_emulated_tls: bool, args_cstr_buff: &[u8], + use_wasm_eh: bool, ) -> Result<Self, LlvmError<'static>> { assert!(args_cstr_buff.len() > 0); assert!( @@ -72,6 +73,7 @@ impl OwnedTargetMachine { use_emulated_tls, args_cstr_buff.as_ptr() as *const c_char, args_cstr_buff.len(), + use_wasm_eh, ) }; diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index bde6a9cf4bc..85a06f457eb 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -15,6 +15,7 @@ use rustc_codegen_ssa::back::write::{ BitcodeSection, CodegenContext, EmitObj, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn, }; +use rustc_codegen_ssa::base::wants_wasm_eh; use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind}; use rustc_data_structures::profiling::SelfProfilerRef; @@ -285,6 +286,8 @@ pub(crate) fn target_machine_factory( let file_name_display_preference = sess.filename_display_preference(RemapPathScopeComponents::DEBUGINFO); + let use_wasm_eh = wants_wasm_eh(sess); + Arc::new(move |config: TargetMachineFactoryConfig| { let path_to_cstring_helper = |path: Option<PathBuf>| -> CString { let path = path.unwrap_or_default(); @@ -321,6 +324,7 @@ pub(crate) fn target_machine_factory( &debuginfo_compression, use_emulated_tls, &args_cstr_buff, + use_wasm_eh, ) }) } @@ -792,35 +796,14 @@ pub(crate) fn optimize( Ok(()) } -pub(crate) fn link( - cgcx: &CodegenContext<LlvmCodegenBackend>, - dcx: DiagCtxtHandle<'_>, - mut modules: Vec<ModuleCodegen<ModuleLlvm>>, -) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> { - use super::lto::{Linker, ModuleBuffer}; - // Sort the modules by name to ensure deterministic behavior. - modules.sort_by(|a, b| a.name.cmp(&b.name)); - let (first, elements) = - modules.split_first().expect("Bug! modules must contain at least one module."); - - let mut linker = Linker::new(first.module_llvm.llmod()); - for module in elements { - let _timer = cgcx.prof.generic_activity_with_arg("LLVM_link_module", &*module.name); - let buffer = ModuleBuffer::new(module.module_llvm.llmod()); - linker - .add(buffer.data()) - .map_err(|()| llvm_err(dcx, LlvmError::SerializeModule { name: &module.name }))?; - } - drop(linker); - Ok(modules.remove(0)) -} - pub(crate) fn codegen( cgcx: &CodegenContext<LlvmCodegenBackend>, - dcx: DiagCtxtHandle<'_>, module: ModuleCodegen<ModuleLlvm>, config: &ModuleConfig, ) -> Result<CompiledModule, FatalError> { + let dcx = cgcx.create_dcx(); + let dcx = dcx.handle(); + let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name); { let llmod = module.module_llvm.llmod(); @@ -879,9 +862,7 @@ pub(crate) fn codegen( .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name); let thin_bc = module.thin_lto_buffer.as_deref().expect("cannot find embedded bitcode"); - unsafe { - embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc); - } + embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &thin_bc); } } @@ -945,7 +926,7 @@ pub(crate) fn codegen( // binaries. So we must clone the module to produce the asm output // if we are also producing object code. let llmod = if let EmitObj::ObjectCode(_) = config.emit_obj { - unsafe { llvm::LLVMCloneModule(llmod) } + llvm::LLVMCloneModule(llmod) } else { llmod }; @@ -1073,7 +1054,7 @@ pub(crate) fn bitcode_section_name(cgcx: &CodegenContext<LlvmCodegenBackend>) -> } /// Embed the bitcode of an LLVM module for LTO in the LLVM module itself. -unsafe fn embed_bitcode( +fn embed_bitcode( cgcx: &CodegenContext<LlvmCodegenBackend>, llcx: &llvm::Context, llmod: &llvm::Module, @@ -1115,43 +1096,40 @@ unsafe fn embed_bitcode( // Unfortunately, LLVM provides no way to set custom section flags. For ELF // and COFF we emit the sections using module level inline assembly for that // reason (see issue #90326 for historical background). - unsafe { - if cgcx.target_is_like_darwin - || cgcx.target_is_like_aix - || cgcx.target_arch == "wasm32" - || cgcx.target_arch == "wasm64" - { - // We don't need custom section flags, create LLVM globals. - let llconst = common::bytes_in_context(llcx, bitcode); - let llglobal = - llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.module"); - llvm::set_initializer(llglobal, llconst); - - llvm::set_section(llglobal, bitcode_section_name(cgcx)); - llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); - llvm::LLVMSetGlobalConstant(llglobal, llvm::True); - - let llconst = common::bytes_in_context(llcx, cmdline.as_bytes()); - let llglobal = - llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline"); - llvm::set_initializer(llglobal, llconst); - let section = if cgcx.target_is_like_darwin { - c"__LLVM,__cmdline" - } else if cgcx.target_is_like_aix { - c".info" - } else { - c".llvmcmd" - }; - llvm::set_section(llglobal, section); - llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); + + if cgcx.target_is_like_darwin + || cgcx.target_is_like_aix + || cgcx.target_arch == "wasm32" + || cgcx.target_arch == "wasm64" + { + // We don't need custom section flags, create LLVM globals. + let llconst = common::bytes_in_context(llcx, bitcode); + let llglobal = llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.module"); + llvm::set_initializer(llglobal, llconst); + + llvm::set_section(llglobal, bitcode_section_name(cgcx)); + llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); + llvm::LLVMSetGlobalConstant(llglobal, llvm::True); + + let llconst = common::bytes_in_context(llcx, cmdline.as_bytes()); + let llglobal = llvm::add_global(llmod, common::val_ty(llconst), c"rustc.embedded.cmdline"); + llvm::set_initializer(llglobal, llconst); + let section = if cgcx.target_is_like_darwin { + c"__LLVM,__cmdline" + } else if cgcx.target_is_like_aix { + c".info" } else { - // We need custom section flags, so emit module-level inline assembly. - let section_flags = if cgcx.is_pe_coff { "n" } else { "e" }; - let asm = create_section_with_flags_asm(".llvmbc", section_flags, bitcode); - llvm::append_module_inline_asm(llmod, &asm); - let asm = create_section_with_flags_asm(".llvmcmd", section_flags, cmdline.as_bytes()); - llvm::append_module_inline_asm(llmod, &asm); - } + c".llvmcmd" + }; + llvm::set_section(llglobal, section); + llvm::set_linkage(llglobal, llvm::Linkage::PrivateLinkage); + } else { + // We need custom section flags, so emit module-level inline assembly. + let section_flags = if cgcx.is_pe_coff { "n" } else { "e" }; + let asm = create_section_with_flags_asm(".llvmbc", section_flags, bitcode); + llvm::append_module_inline_asm(llmod, &asm); + let asm = create_section_with_flags_asm(".llvmcmd", section_flags, cmdline.as_bytes()); + llvm::append_module_inline_asm(llmod, &asm); } } @@ -1182,7 +1160,7 @@ fn create_msvc_imps( .filter_map(|val| { // Exclude some symbols that we know are not Rust symbols. let name = llvm::get_value_name(val); - if ignored(name) { None } else { Some((val, name)) } + if ignored(&name) { None } else { Some((val, name)) } }) .map(move |(val, name)| { let mut imp_name = prefix.as_bytes().to_vec(); diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 5e9594dd06b..917d07e3c61 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -3,6 +3,7 @@ use std::ops::Deref; use std::{iter, ptr}; pub(crate) mod autodiff; +pub(crate) mod gpu_offload; use libc::{c_char, c_uint, size_t}; use rustc_abi as abi; @@ -14,6 +15,7 @@ use rustc_codegen_ssa::mir::place::PlaceRef; use rustc_codegen_ssa::traits::*; use rustc_data_structures::small_c_str::SmallCStr; use rustc_hir::def_id::DefId; +use rustc_middle::bug; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; use rustc_middle::ty::layout::{ FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers, @@ -23,7 +25,7 @@ use rustc_middle::ty::{self, Instance, Ty, TyCtxt}; use rustc_sanitizers::{cfi, kcfi}; use rustc_session::config::OptLevel; use rustc_span::Span; -use rustc_target::callconv::FnAbi; +use rustc_target::callconv::{FnAbi, PassMode}; use rustc_target::spec::{HasTargetSpec, SanitizerSet, Target}; use smallvec::SmallVec; use tracing::{debug, instrument}; @@ -117,6 +119,74 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> { } bx } + + // The generic builder has less functionality and thus (unlike the other alloca) we can not + // easily jump to the beginning of the function to place our allocas there. We trust the user + // to manually do that. FIXME(offload): improve the genericCx and add more llvm wrappers to + // handle this. + pub(crate) fn direct_alloca(&mut self, ty: &'ll Type, align: Align, name: &str) -> &'ll Value { + let val = unsafe { + let alloca = llvm::LLVMBuildAlloca(self.llbuilder, ty, UNNAMED); + llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint); + // Cast to default addrspace if necessary + llvm::LLVMBuildPointerCast(self.llbuilder, alloca, self.cx.type_ptr(), UNNAMED) + }; + if name != "" { + let name = std::ffi::CString::new(name).unwrap(); + llvm::set_value_name(val, &name.as_bytes()); + } + val + } + + pub(crate) fn inbounds_gep( + &mut self, + ty: &'ll Type, + ptr: &'ll Value, + indices: &[&'ll Value], + ) -> &'ll Value { + unsafe { + llvm::LLVMBuildGEPWithNoWrapFlags( + self.llbuilder, + ty, + ptr, + indices.as_ptr(), + indices.len() as c_uint, + UNNAMED, + GEPNoWrapFlags::InBounds, + ) + } + } + + pub(crate) fn store(&mut self, val: &'ll Value, ptr: &'ll Value, align: Align) -> &'ll Value { + debug!("Store {:?} -> {:?}", val, ptr); + assert_eq!(self.cx.type_kind(self.cx.val_ty(ptr)), TypeKind::Pointer); + unsafe { + let store = llvm::LLVMBuildStore(self.llbuilder, val, ptr); + llvm::LLVMSetAlignment(store, align.bytes() as c_uint); + store + } + } + + pub(crate) fn load(&mut self, ty: &'ll Type, ptr: &'ll Value, align: Align) -> &'ll Value { + unsafe { + let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED); + llvm::LLVMSetAlignment(load, align.bytes() as c_uint); + load + } + } + + fn memset(&mut self, ptr: &'ll Value, fill_byte: &'ll Value, size: &'ll Value, align: Align) { + unsafe { + llvm::LLVMRustBuildMemSet( + self.llbuilder, + ptr, + align.bytes() as c_uint, + fill_byte, + size, + false, + ); + } + } } /// Empty string, to be used where LLVM expects an instruction name, indicating @@ -302,10 +372,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { return; } - let id_str = "branch_weights"; - let id = unsafe { - llvm::LLVMMDStringInContext2(self.cx.llcx, id_str.as_ptr().cast(), id_str.len()) - }; + let id = self.cx.create_metadata(b"branch_weights"); // For switch instructions with 2 targets, the `llvm.expect` intrinsic is used. // This function handles switch instructions with more than 2 targets and it needs to @@ -490,13 +557,25 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { let (size, signed) = ty.int_size_and_signed(self.tcx); let width = size.bits(); - if oop == OverflowOp::Sub && !signed { - // Emit sub and icmp instead of llvm.usub.with.overflow. LLVM considers these - // to be the canonical form. It will attempt to reform llvm.usub.with.overflow - // in the backend if profitable. - let sub = self.sub(lhs, rhs); - let cmp = self.icmp(IntPredicate::IntULT, lhs, rhs); - return (sub, cmp); + if !signed { + match oop { + OverflowOp::Sub => { + // Emit sub and icmp instead of llvm.usub.with.overflow. LLVM considers these + // to be the canonical form. It will attempt to reform llvm.usub.with.overflow + // in the backend if profitable. + let sub = self.sub(lhs, rhs); + let cmp = self.icmp(IntPredicate::IntULT, lhs, rhs); + return (sub, cmp); + } + OverflowOp::Add => { + // Like with sub above, using icmp is the preferred form. See + // <https://rust-lang.zulipchat.com/#narrow/channel/187780-t-compiler.2Fllvm/topic/.60uadd.2Ewith.2Eoverflow.60.20.28again.29/near/533041085> + let add = self.add(lhs, rhs); + let cmp = self.icmp(IntPredicate::IntULT, add, lhs); + return (add, cmp); + } + OverflowOp::Mul => {} + } } let oop_str = match oop { @@ -538,16 +617,6 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { } } - fn dynamic_alloca(&mut self, size: &'ll Value, align: Align) -> &'ll Value { - unsafe { - let alloca = - llvm::LLVMBuildArrayAlloca(self.llbuilder, self.cx().type_i8(), size, UNNAMED); - llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint); - // Cast to default addrspace if necessary - llvm::LLVMBuildPointerCast(self.llbuilder, alloca, self.cx().type_ptr(), UNNAMED) - } - } - fn load(&mut self, ty: &'ll Type, ptr: &'ll Value, align: Align) -> &'ll Value { unsafe { let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED); @@ -631,10 +700,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { bx.nonnull_metadata(load); } - if let Some(pointee) = layout.pointee_info_at(bx, offset) { - if let Some(_) = pointee.safe { - bx.align_metadata(load, pointee.align); - } + if let Some(pointee) = layout.pointee_info_at(bx, offset) + && let Some(_) = pointee.safe + { + bx.align_metadata(load, pointee.align); } } abi::Primitive::Float(_) => {} @@ -647,17 +716,16 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { } else if place.layout.is_llvm_immediate() { let mut const_llval = None; let llty = place.layout.llvm_type(self); - unsafe { - if let Some(global) = llvm::LLVMIsAGlobalVariable(place.val.llval) { - if llvm::LLVMIsGlobalConstant(global) == llvm::True { - if let Some(init) = llvm::LLVMGetInitializer(global) { - if self.val_ty(init) == llty { - const_llval = Some(init); - } + if let Some(global) = llvm::LLVMIsAGlobalVariable(place.val.llval) { + if llvm::LLVMIsGlobalConstant(global) == llvm::True { + if let Some(init) = llvm::LLVMGetInitializer(global) { + if self.val_ty(init) == llty { + const_llval = Some(init); } } } } + let llval = const_llval.unwrap_or_else(|| { let load = self.load(llty, place.val.llval, place.val.align); if let abi::BackendRepr::Scalar(scalar) = place.layout.backend_repr { @@ -1166,11 +1234,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1)) } - fn filter_landing_pad(&mut self, pers_fn: &'ll Value) -> (&'ll Value, &'ll Value) { + fn filter_landing_pad(&mut self, pers_fn: &'ll Value) { let ty = self.type_struct(&[self.type_ptr(), self.type_i32()], false); let landing_pad = self.landing_pad(ty, pers_fn, 1); self.add_clause(landing_pad, self.const_array(self.type_ptr(), &[])); - (self.extract_value(landing_pad, 0), self.extract_value(landing_pad, 1)) } fn resume(&mut self, exn0: &'ll Value, exn1: &'ll Value) { @@ -1272,15 +1339,13 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { &mut self, op: rustc_codegen_ssa::common::AtomicRmwBinOp, dst: &'ll Value, - mut src: &'ll Value, + src: &'ll Value, order: rustc_middle::ty::AtomicOrdering, + ret_ptr: bool, ) -> &'ll Value { - // The only RMW operation that LLVM supports on pointers is compare-exchange. - let requires_cast_to_int = self.val_ty(src) == self.type_ptr() - && op != rustc_codegen_ssa::common::AtomicRmwBinOp::AtomicXchg; - if requires_cast_to_int { - src = self.ptrtoint(src, self.type_isize()); - } + // FIXME: If `ret_ptr` is true and `src` is not a pointer, we *should* tell LLVM that the + // LHS is a pointer and the operation should be provenance-preserving, but LLVM does not + // currently support that (https://github.com/llvm/llvm-project/issues/120837). let mut res = unsafe { llvm::LLVMBuildAtomicRMW( self.llbuilder, @@ -1291,7 +1356,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { llvm::False, // SingleThreaded ) }; - if requires_cast_to_int { + if ret_ptr && self.val_ty(res) != self.type_ptr() { res = self.inttoptr(res, self.type_ptr()); } res @@ -1377,6 +1442,28 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { call } + fn tail_call( + &mut self, + llty: Self::Type, + fn_attrs: Option<&CodegenFnAttrs>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + llfn: Self::Value, + args: &[Self::Value], + funclet: Option<&Self::Funclet>, + instance: Option<Instance<'tcx>>, + ) { + let call = self.call(llty, fn_attrs, Some(fn_abi), llfn, args, funclet, instance); + llvm::LLVMRustSetTailCallKind(call, llvm::TailCallKind::MustTail); + + match &fn_abi.ret.mode { + PassMode::Ignore | PassMode::Indirect { .. } => self.ret_void(), + PassMode::Direct(_) | PassMode::Pair { .. } => self.ret(call), + mode @ PassMode::Cast { .. } => { + bug!("Encountered `PassMode::{mode:?}` during codegen") + } + } + } + fn zext(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value { unsafe { llvm::LLVMBuildZExt(self.llbuilder, val, dest_ty, UNNAMED) } } @@ -1732,7 +1819,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { } else { cfi::typeid_for_fnabi(self.tcx, fn_abi, options) }; - let typeid_metadata = self.cx.typeid_metadata(typeid).unwrap(); + let typeid_metadata = self.cx.create_metadata(typeid.as_bytes()); let dbg_loc = self.get_dbg_loc(); // Test whether the function pointer is associated with the type identifier using the @@ -1809,48 +1896,4 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> { ) { self.call_intrinsic("llvm.instrprof.increment", &[], &[fn_name, hash, num_counters, index]); } - - /// Emits a call to `llvm.instrprof.mcdc.parameters`. - /// - /// This doesn't produce any code directly, but is used as input by - /// the LLVM pass that handles coverage instrumentation. - /// - /// (See clang's [`CodeGenPGO::emitMCDCParameters`] for comparison.) - /// - /// [`CodeGenPGO::emitMCDCParameters`]: - /// https://github.com/rust-lang/llvm-project/blob/5399a24/clang/lib/CodeGen/CodeGenPGO.cpp#L1124 - #[instrument(level = "debug", skip(self))] - pub(crate) fn mcdc_parameters( - &mut self, - fn_name: &'ll Value, - hash: &'ll Value, - bitmap_bits: &'ll Value, - ) { - self.call_intrinsic("llvm.instrprof.mcdc.parameters", &[], &[fn_name, hash, bitmap_bits]); - } - - #[instrument(level = "debug", skip(self))] - pub(crate) fn mcdc_tvbitmap_update( - &mut self, - fn_name: &'ll Value, - hash: &'ll Value, - bitmap_index: &'ll Value, - mcdc_temp: &'ll Value, - ) { - let args = &[fn_name, hash, bitmap_index, mcdc_temp]; - self.call_intrinsic("llvm.instrprof.mcdc.tvbitmap.update", &[], args); - } - - #[instrument(level = "debug", skip(self))] - pub(crate) fn mcdc_condbitmap_reset(&mut self, mcdc_temp: &'ll Value) { - self.store(self.const_i32(0), mcdc_temp, self.tcx.data_layout.i32_align.abi); - } - - #[instrument(level = "debug", skip(self))] - pub(crate) fn mcdc_condbitmap_update(&mut self, cond_index: &'ll Value, mcdc_temp: &'ll Value) { - let align = self.tcx.data_layout.i32_align.abi; - let current_tv_index = self.load(self.cx.type_i32(), mcdc_temp, align); - let new_tv_index = self.add(current_tv_index, cond_index); - self.store(new_tv_index, mcdc_temp, align); - } } diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs index b07d9a5cfca..829b3c513c2 100644 --- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs +++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs @@ -2,7 +2,6 @@ use std::ptr; use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode}; use rustc_codegen_ssa::ModuleCodegen; -use rustc_codegen_ssa::back::write::ModuleConfig; use rustc_codegen_ssa::common::TypeKind; use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; use rustc_errors::FatalError; @@ -76,12 +75,12 @@ fn match_args_from_caller_to_enzyme<'ll>( outer_pos = 1; } - let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap(); - let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap(); - let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap(); - let enzyme_dupv = cx.create_metadata("enzyme_dupv".to_string()).unwrap(); - let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap(); - let enzyme_dupnoneedv = cx.create_metadata("enzyme_dupnoneedv".to_string()).unwrap(); + let enzyme_const = cx.create_metadata(b"enzyme_const"); + let enzyme_out = cx.create_metadata(b"enzyme_out"); + let enzyme_dup = cx.create_metadata(b"enzyme_dup"); + let enzyme_dupv = cx.create_metadata(b"enzyme_dupv"); + let enzyme_dupnoneed = cx.create_metadata(b"enzyme_dupnoneed"); + let enzyme_dupnoneedv = cx.create_metadata(b"enzyme_dupnoneedv"); while activity_pos < inputs.len() { let diff_activity = inputs[activity_pos as usize]; @@ -306,7 +305,7 @@ fn generate_enzyme_call<'ll>( // add outer_fn name to ad_name to make it unique, in case users apply autodiff to multiple // functions. Unwrap will only panic, if LLVM gave us an invalid string. let name = llvm::get_value_name(outer_fn); - let outer_fn_name = std::str::from_utf8(name).unwrap(); + let outer_fn_name = std::str::from_utf8(&name).unwrap(); ad_name.push_str(outer_fn_name); // Let us assume the user wrote the following function square: @@ -378,12 +377,12 @@ fn generate_enzyme_call<'ll>( let mut args = Vec::with_capacity(num_args as usize + 1); args.push(fn_to_diff); - let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap(); + let enzyme_primal_ret = cx.create_metadata(b"enzyme_primal_return"); if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) { args.push(cx.get_metadata_value(enzyme_primal_ret)); } if attrs.width > 1 { - let enzyme_width = cx.create_metadata("enzyme_width".to_string()).unwrap(); + let enzyme_width = cx.create_metadata(b"enzyme_width"); args.push(cx.get_metadata_value(enzyme_width)); args.push(cx.get_const_int(cx.type_i64(), attrs.width as u64)); } @@ -461,7 +460,6 @@ pub(crate) fn differentiate<'ll>( module: &'ll ModuleCodegen<ModuleLlvm>, cgcx: &CodegenContext<LlvmCodegenBackend>, diff_items: Vec<AutoDiffItem>, - _config: &ModuleConfig, ) -> Result<(), FatalError> { for item in &diff_items { trace!("{}", item); diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs new file mode 100644 index 00000000000..1280ab1442a --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs @@ -0,0 +1,439 @@ +use std::ffi::CString; + +use llvm::Linkage::*; +use rustc_abi::Align; +use rustc_codegen_ssa::back::write::CodegenContext; +use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; + +use crate::builder::SBuilder; +use crate::common::AsCCharPtr; +use crate::llvm::AttributePlace::Function; +use crate::llvm::{self, Linkage, Type, Value}; +use crate::{LlvmCodegenBackend, SimpleCx, attributes}; + +pub(crate) fn handle_gpu_code<'ll>( + _cgcx: &CodegenContext<LlvmCodegenBackend>, + cx: &'ll SimpleCx<'_>, +) { + // The offload memory transfer type for each kernel + let mut o_types = vec![]; + let mut kernels = vec![]; + let offload_entry_ty = add_tgt_offload_entry(&cx); + for num in 0..9 { + let kernel = cx.get_function(&format!("kernel_{num}")); + if let Some(kernel) = kernel { + o_types.push(gen_define_handling(&cx, kernel, offload_entry_ty, num)); + kernels.push(kernel); + } + } + + gen_call_handling(&cx, &kernels, &o_types); +} + +// What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper: +// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +// @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +fn generate_at_one<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Value { + // @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 + let unknown_txt = ";unknown;unknown;0;0;;"; + let c_entry_name = CString::new(unknown_txt).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let at_zero = add_unnamed_global(&cx, &"", initializer, PrivateLinkage); + llvm::set_alignment(at_zero, Align::ONE); + + // @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 + let struct_ident_ty = cx.type_named_struct("struct.ident_t"); + let struct_elems = vec![ + cx.get_const_i32(0), + cx.get_const_i32(2), + cx.get_const_i32(0), + cx.get_const_i32(22), + at_zero, + ]; + let struct_elems_ty: Vec<_> = struct_elems.iter().map(|&x| cx.val_ty(x)).collect(); + let initializer = crate::common::named_struct(struct_ident_ty, &struct_elems); + cx.set_struct_body(struct_ident_ty, &struct_elems_ty, false); + let at_one = add_unnamed_global(&cx, &"", initializer, PrivateLinkage); + llvm::set_alignment(at_one, Align::EIGHT); + at_one +} + +pub(crate) fn add_tgt_offload_entry<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Type { + let offload_entry_ty = cx.type_named_struct("struct.__tgt_offload_entry"); + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let ti16 = cx.type_i16(); + // For each kernel to run on the gpu, we will later generate one entry of this type. + // copied from LLVM + // typedef struct { + // uint64_t Reserved; + // uint16_t Version; + // uint16_t Kind; + // uint32_t Flags; Flags associated with the entry (see Target Region Entry Flags) + // void *Address; Address of global symbol within device image (function or global) + // char *SymbolName; + // uint64_t Size; Size of the entry info (0 if it is a function) + // uint64_t Data; + // void *AuxAddr; + // } __tgt_offload_entry; + let entry_elements = vec![ti64, ti16, ti16, ti32, tptr, tptr, ti64, ti64, tptr]; + cx.set_struct_body(offload_entry_ty, &entry_elements, false); + offload_entry_ty +} + +fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) { + let kernel_arguments_ty = cx.type_named_struct("struct.__tgt_kernel_arguments"); + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let tarr = cx.type_array(ti32, 3); + + // Taken from the LLVM APITypes.h declaration: + //struct KernelArgsTy { + // uint32_t Version = 0; // Version of this struct for ABI compatibility. + // uint32_t NumArgs = 0; // Number of arguments in each input pointer. + // void **ArgBasePtrs = + // nullptr; // Base pointer of each argument (e.g. a struct). + // void **ArgPtrs = nullptr; // Pointer to the argument data. + // int64_t *ArgSizes = nullptr; // Size of the argument data in bytes. + // int64_t *ArgTypes = nullptr; // Type of the data (e.g. to / from). + // void **ArgNames = nullptr; // Name of the data for debugging, possibly null. + // void **ArgMappers = nullptr; // User-defined mappers, possibly null. + // uint64_t Tripcount = + // 0; // Tripcount for the teams / distribute loop, 0 otherwise. + // struct { + // uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause. + // uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA. + // uint64_t Unused : 62; + // } Flags = {0, 0, 0}; + // // The number of teams (for x,y,z dimension). + // uint32_t NumTeams[3] = {0, 0, 0}; + // // The number of threads (for x,y,z dimension). + // uint32_t ThreadLimit[3] = {0, 0, 0}; + // uint32_t DynCGroupMem = 0; // Amount of dynamic cgroup memory requested. + //}; + let kernel_elements = + vec![ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32]; + + cx.set_struct_body(kernel_arguments_ty, &kernel_elements, false); + // For now we don't handle kernels, so for now we just add a global dummy + // to make sure that the __tgt_offload_entry is defined and handled correctly. + cx.declare_global("my_struct_global2", kernel_arguments_ty); +} + +fn gen_tgt_data_mappers<'ll>( + cx: &'ll SimpleCx<'_>, +) -> (&'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Type) { + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + + let args = vec![tptr, ti64, ti32, tptr, tptr, tptr, tptr, tptr, tptr]; + let mapper_fn_ty = cx.type_func(&args, cx.type_void()); + let mapper_begin = "__tgt_target_data_begin_mapper"; + let mapper_update = "__tgt_target_data_update_mapper"; + let mapper_end = "__tgt_target_data_end_mapper"; + let begin_mapper_decl = declare_offload_fn(&cx, mapper_begin, mapper_fn_ty); + let update_mapper_decl = declare_offload_fn(&cx, mapper_update, mapper_fn_ty); + let end_mapper_decl = declare_offload_fn(&cx, mapper_end, mapper_fn_ty); + + let nounwind = llvm::AttributeKind::NoUnwind.create_attr(cx.llcx); + attributes::apply_to_llfn(begin_mapper_decl, Function, &[nounwind]); + attributes::apply_to_llfn(update_mapper_decl, Function, &[nounwind]); + attributes::apply_to_llfn(end_mapper_decl, Function, &[nounwind]); + + (begin_mapper_decl, update_mapper_decl, end_mapper_decl, mapper_fn_ty) +} + +fn add_priv_unnamed_arr<'ll>(cx: &SimpleCx<'ll>, name: &str, vals: &[u64]) -> &'ll llvm::Value { + let ti64 = cx.type_i64(); + let mut size_val = Vec::with_capacity(vals.len()); + for &val in vals { + size_val.push(cx.get_const_i64(val)); + } + let initializer = cx.const_array(ti64, &size_val); + add_unnamed_global(cx, name, initializer, PrivateLinkage) +} + +pub(crate) fn add_unnamed_global<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, +) -> &'ll llvm::Value { + let llglobal = add_global(cx, name, initializer, l); + llvm::LLVMSetUnnamedAddress(llglobal, llvm::UnnamedAddr::Global); + llglobal +} + +pub(crate) fn add_global<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, +) -> &'ll llvm::Value { + let c_name = CString::new(name).unwrap(); + let llglobal: &'ll llvm::Value = llvm::add_global(cx.llmod, cx.val_ty(initializer), &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, l); + llvm::set_initializer(llglobal, initializer); + llglobal +} + +fn gen_define_handling<'ll>( + cx: &'ll SimpleCx<'_>, + kernel: &'ll llvm::Value, + offload_entry_ty: &'ll llvm::Type, + num: i64, +) -> &'ll llvm::Value { + let types = cx.func_params_types(cx.get_type_of_global(kernel)); + // It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or + // reference) types. + let num_ptr_types = types + .iter() + .map(|&x| matches!(cx.type_kind(x), rustc_codegen_ssa::common::TypeKind::Pointer)) + .count(); + + // We do not know their size anymore at this level, so hardcode a placeholder. + // A follow-up pr will track these from the frontend, where we still have Rust types. + // Then, we will be able to figure out that e.g. `&[f32;256]` will result in 4*256 bytes. + // I decided that 1024 bytes is a great placeholder value for now. + add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{num}"), &vec![1024; num_ptr_types]); + // Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2), + // or both to and from the gpu (=3). Other values shouldn't affect us for now. + // A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten + // will be 2. For now, everything is 3, until we have our frontend set up. + let o_types = + add_priv_unnamed_arr(&cx, &format!(".offload_maptypes.{num}"), &vec![3; num_ptr_types]); + // Next: For each function, generate these three entries. A weak constant, + // the llvm.rodata entry name, and the omp_offloading_entries value + + let name = format!(".kernel_{num}.region_id"); + let initializer = cx.get_const_i8(0); + let region_id = add_unnamed_global(&cx, &name, initializer, WeakAnyLinkage); + + let c_entry_name = CString::new(format!("kernel_{num}")).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let offload_entry_name = format!(".offloading.entry_name.{num}"); + + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let llglobal = add_unnamed_global(&cx, &offload_entry_name, initializer, InternalLinkage); + llvm::set_alignment(llglobal, Align::ONE); + llvm::set_section(llglobal, c".llvm.rodata.offloading"); + + // Not actively used yet, for calling real kernels + let name = format!(".offloading.entry.kernel_{num}"); + + // See the __tgt_offload_entry documentation above. + let reserved = cx.get_const_i64(0); + let version = cx.get_const_i16(1); + let kind = cx.get_const_i16(1); + let flags = cx.get_const_i32(0); + let size = cx.get_const_i64(0); + let data = cx.get_const_i64(0); + let aux_addr = cx.const_null(cx.type_ptr()); + let elems = vec![reserved, version, kind, flags, region_id, llglobal, size, data, aux_addr]; + + let initializer = crate::common::named_struct(offload_entry_ty, &elems); + let c_name = CString::new(name).unwrap(); + let llglobal = llvm::add_global(cx.llmod, offload_entry_ty, &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, WeakAnyLinkage); + llvm::set_initializer(llglobal, initializer); + llvm::set_alignment(llglobal, Align::ONE); + let c_section_name = CString::new(".omp_offloading_entries").unwrap(); + llvm::set_section(llglobal, &c_section_name); + o_types +} + +fn declare_offload_fn<'ll>( + cx: &'ll SimpleCx<'_>, + name: &str, + ty: &'ll llvm::Type, +) -> &'ll llvm::Value { + crate::declare::declare_simple_fn( + cx, + name, + llvm::CallConv::CCallConv, + llvm::UnnamedAddr::No, + llvm::Visibility::Default, + ty, + ) +} + +// For each kernel *call*, we now use some of our previous declared globals to move data to and from +// the gpu. We don't have a proper frontend yet, so we assume that every call to a kernel function +// from main is intended to run on the GPU. For now, we only handle the data transfer part of it. +// If two consecutive kernels use the same memory, we still move it to the host and back to the gpu. +// Since in our frontend users (by default) don't have to specify data transfer, this is something +// we should optimize in the future! We also assume that everything should be copied back and forth, +// but sometimes we can directly zero-allocate on the device and only move back, or if something is +// immutable, we might only copy it to the device, but not back. +// +// Current steps: +// 0. Alloca some variables for the following steps +// 1. set insert point before kernel call. +// 2. generate all the GEPS and stores, to be used in 3) +// 3. generate __tgt_target_data_begin calls to move data to the GPU +// +// unchanged: keep kernel call. Later move the kernel to the GPU +// +// 4. set insert point after kernel call. +// 5. generate all the GEPS and stores, to be used in 6) +// 6. generate __tgt_target_data_end calls to move data from the GPU +fn gen_call_handling<'ll>( + cx: &'ll SimpleCx<'_>, + _kernels: &[&'ll llvm::Value], + o_types: &[&'ll llvm::Value], +) { + // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } + let tptr = cx.type_ptr(); + let ti32 = cx.type_i32(); + let tgt_bin_desc_ty = vec![ti32, tptr, tptr, tptr]; + let tgt_bin_desc = cx.type_named_struct("struct.__tgt_bin_desc"); + cx.set_struct_body(tgt_bin_desc, &tgt_bin_desc_ty, false); + + gen_tgt_kernel_global(&cx); + let (begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers(&cx); + + let main_fn = cx.get_function("main"); + let Some(main_fn) = main_fn else { return }; + let kernel_name = "kernel_1"; + let call = unsafe { + llvm::LLVMRustGetFunctionCall(main_fn, kernel_name.as_c_char_ptr(), kernel_name.len()) + }; + let Some(kernel_call) = call else { + return; + }; + let kernel_call_bb = unsafe { llvm::LLVMGetInstructionParent(kernel_call) }; + let called = unsafe { llvm::LLVMGetCalledValue(kernel_call).unwrap() }; + let mut builder = SBuilder::build(cx, kernel_call_bb); + + let types = cx.func_params_types(cx.get_type_of_global(called)); + let num_args = types.len() as u64; + + // Step 0) + // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } + // %6 = alloca %struct.__tgt_bin_desc, align 8 + unsafe { llvm::LLVMRustPositionBuilderPastAllocas(builder.llbuilder, main_fn) }; + + let tgt_bin_desc_alloca = builder.direct_alloca(tgt_bin_desc, Align::EIGHT, "EmptyDesc"); + + let ty = cx.type_array(cx.type_ptr(), num_args); + // Baseptr are just the input pointer to the kernel, stored in a local alloca + let a1 = builder.direct_alloca(ty, Align::EIGHT, ".offload_baseptrs"); + // Ptrs are the result of a gep into the baseptr, at least for our trivial types. + let a2 = builder.direct_alloca(ty, Align::EIGHT, ".offload_ptrs"); + // These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16. + let ty2 = cx.type_array(cx.type_i64(), num_args); + let a4 = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes"); + // Now we allocate once per function param, a copy to be passed to one of our maps. + let mut vals = vec![]; + let mut geps = vec![]; + let i32_0 = cx.get_const_i32(0); + for (index, in_ty) in types.iter().enumerate() { + // get function arg, store it into the alloca, and read it. + let p = llvm::get_param(called, index as u32); + let name = llvm::get_value_name(p); + let name = str::from_utf8(&name).unwrap(); + let arg_name = format!("{name}.addr"); + let alloca = builder.direct_alloca(in_ty, Align::EIGHT, &arg_name); + + builder.store(p, alloca, Align::EIGHT); + let val = builder.load(in_ty, alloca, Align::EIGHT); + let gep = builder.inbounds_gep(cx.type_f32(), val, &[i32_0]); + vals.push(val); + geps.push(gep); + } + + // Step 1) + unsafe { llvm::LLVMRustPositionBefore(builder.llbuilder, kernel_call) }; + builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT); + + let mapper_fn_ty = cx.type_func(&[cx.type_ptr()], cx.type_void()); + let register_lib_decl = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty); + let unregister_lib_decl = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty); + let init_ty = cx.type_func(&[], cx.type_void()); + let init_rtls_decl = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty); + + // call void @__tgt_register_lib(ptr noundef %6) + builder.call(mapper_fn_ty, register_lib_decl, &[tgt_bin_desc_alloca], None); + // call void @__tgt_init_all_rtls() + builder.call(init_ty, init_rtls_decl, &[], None); + + for i in 0..num_args { + let idx = cx.get_const_i32(i); + let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, idx]); + builder.store(vals[i as usize], gep1, Align::EIGHT); + let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, idx]); + builder.store(geps[i as usize], gep2, Align::EIGHT); + let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]); + // As mentioned above, we don't use Rust type information yet. So for now we will just + // assume that we have 1024 bytes, 256 f32 values. + // FIXME(offload): write an offload frontend and handle arbitrary types. + builder.store(cx.get_const_i64(1024), gep3, Align::EIGHT); + } + + // For now we have a very simplistic indexing scheme into our + // offload_{baseptrs,ptrs,sizes}. We will probably improve this along with our gpu frontend pr. + fn get_geps<'a, 'll>( + builder: &mut SBuilder<'a, 'll>, + cx: &'ll SimpleCx<'ll>, + ty: &'ll Type, + ty2: &'ll Type, + a1: &'ll Value, + a2: &'ll Value, + a4: &'ll Value, + ) -> (&'ll Value, &'ll Value, &'ll Value) { + let i32_0 = cx.get_const_i32(0); + + let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, i32_0]); + let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, i32_0]); + let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, i32_0]); + (gep1, gep2, gep3) + } + + fn generate_mapper_call<'a, 'll>( + builder: &mut SBuilder<'a, 'll>, + cx: &'ll SimpleCx<'ll>, + geps: (&'ll Value, &'ll Value, &'ll Value), + o_type: &'ll Value, + fn_to_call: &'ll Value, + fn_ty: &'ll Type, + num_args: u64, + s_ident_t: &'ll Value, + ) { + let nullptr = cx.const_null(cx.type_ptr()); + let i64_max = cx.get_const_i64(u64::MAX); + let num_args = cx.get_const_i32(num_args); + let args = + vec![s_ident_t, i64_max, num_args, geps.0, geps.1, geps.2, o_type, nullptr, nullptr]; + builder.call(fn_ty, fn_to_call, &args, None); + } + + // Step 2) + let s_ident_t = generate_at_one(&cx); + let o = o_types[0]; + let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); + generate_mapper_call(&mut builder, &cx, geps, o, begin_mapper_decl, fn_ty, num_args, s_ident_t); + + // Step 3) + // Here we will add code for the actual kernel launches in a follow-up PR. + // FIXME(offload): launch kernels + + // Step 4) + unsafe { llvm::LLVMRustPositionAfter(builder.llbuilder, kernel_call) }; + + let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); + generate_mapper_call(&mut builder, &cx, geps, o, end_mapper_decl, fn_ty, num_args, s_ident_t); + + builder.call(mapper_fn_ty, unregister_lib_decl, &[tgt_bin_desc_alloca], None); + + // With this we generated the following begin and end mappers. We could easily generate the + // update mapper in an update. + // call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 3, ptr %27, ptr %28, ptr %29, ptr @.offload_maptypes, ptr null, ptr null) + // call void @__tgt_target_data_update_mapper(ptr @1, i64 -1, i32 2, ptr %46, ptr %47, ptr %48, ptr @.offload_maptypes.1, ptr null, ptr null) + // call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 3, ptr %49, ptr %50, ptr %51, ptr @.offload_maptypes, ptr null, ptr null) +} diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs index 6d68eca60af..791a71d73ae 100644 --- a/compiler/rustc_codegen_llvm/src/callee.rs +++ b/compiler/rustc_codegen_llvm/src/callee.rs @@ -102,8 +102,8 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t let is_hidden = if is_generic { // This is a monomorphization of a generic function. if !(cx.tcx.sess.opts.share_generics() - || tcx.codegen_fn_attrs(instance_def_id).inline - == rustc_attr_data_structures::InlineAttr::Never) + || tcx.codegen_instance_attrs(instance.def).inline + == rustc_hir::attrs::InlineAttr::Never) { // When not sharing generics, all instances are in the same // crate and have hidden visibility. diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index ae5add59322..f29fefb66f0 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -3,9 +3,8 @@ use std::borrow::Borrow; use libc::{c_char, c_uint}; -use rustc_abi as abi; -use rustc_abi::HasDataLayout; use rustc_abi::Primitive::Pointer; +use rustc_abi::{self as abi, HasDataLayout as _}; use rustc_ast::Mutability; use rustc_codegen_ssa::common::TypeKind; use rustc_codegen_ssa::traits::*; @@ -119,6 +118,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { r } } + + pub(crate) fn const_null(&self, t: &'ll Type) -> &'ll Value { + unsafe { llvm::LLVMConstNull(t) } + } } impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { @@ -175,7 +178,7 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { } fn const_usize(&self, i: u64) -> &'ll Value { - let bit_size = self.data_layout().pointer_size.bits(); + let bit_size = self.data_layout().pointer_size().bits(); if bit_size < 64 { // make sure it doesn't overflow assert!(i < (1 << bit_size)); @@ -216,10 +219,10 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { bug!("symbol `{}` is already defined", sym); }); llvm::set_initializer(g, sc); - unsafe { - llvm::LLVMSetGlobalConstant(g, True); - llvm::LLVMSetUnnamedAddress(g, llvm::UnnamedAddr::Global); - } + + llvm::set_global_constant(g, true); + llvm::set_unnamed_address(g, llvm::UnnamedAddr::Global); + llvm::set_linkage(g, llvm::Linkage::InternalLinkage); // Cast to default address space if globals are in a different addrspace let g = self.const_pointercast(g, self.type_ptr()); @@ -268,7 +271,7 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { } } Scalar::Ptr(ptr, _size) => { - let (prov, offset) = ptr.into_parts(); + let (prov, offset) = ptr.prov_and_relative_offset(); let global_alloc = self.tcx.global_alloc(prov.alloc_id()); let base_addr = match global_alloc { GlobalAlloc::Memory(alloc) => { @@ -284,7 +287,8 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { self.const_bitcast(llval, llty) }; } else { - let init = const_alloc_to_llvm(self, alloc, /*static*/ false); + let init = + const_alloc_to_llvm(self, alloc.inner(), /*static*/ false); let alloc = alloc.inner(); let value = match alloc.mutability { Mutability::Mut => self.static_addr_of_mut(init, alloc.align, None), @@ -316,15 +320,19 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { }), ))) .unwrap_memory(); - let init = const_alloc_to_llvm(self, alloc, /*static*/ false); - let value = self.static_addr_of_impl(init, alloc.inner().align, None); - value + let init = const_alloc_to_llvm(self, alloc.inner(), /*static*/ false); + self.static_addr_of_impl(init, alloc.inner().align, None) } GlobalAlloc::Static(def_id) => { assert!(self.tcx.is_static(def_id)); assert!(!self.tcx.is_thread_local_static(def_id)); self.get_static(def_id) } + GlobalAlloc::TypeId { .. } => { + // Drop the provenance, the offset contains the bytes of the hash + let llval = self.const_usize(offset.bytes()); + return unsafe { llvm::LLVMConstIntToPtr(llval, llty) }; + } }; let base_addr_space = global_alloc.address_space(self); let llval = unsafe { @@ -346,7 +354,7 @@ impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { } fn const_data_from_alloc(&self, alloc: ConstAllocation<'_>) -> Self::Value { - const_alloc_to_llvm(self, alloc, /*static*/ false) + const_alloc_to_llvm(self, alloc.inner(), /*static*/ false) } fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value { @@ -373,6 +381,11 @@ pub(crate) fn bytes_in_context<'ll>(llcx: &'ll llvm::Context, bytes: &[u8]) -> & } } +pub(crate) fn named_struct<'ll>(ty: &'ll Type, elts: &[&'ll Value]) -> &'ll Value { + let len = c_uint::try_from(elts.len()).expect("LLVMConstStructInContext elements len overflow"); + unsafe { llvm::LLVMConstNamedStruct(ty, elts.as_ptr(), len) } +} + fn struct_in_context<'ll>( llcx: &'ll llvm::Context, elts: &[&'ll Value], diff --git a/compiler/rustc_codegen_llvm/src/consts.rs b/compiler/rustc_codegen_llvm/src/consts.rs index a4492d76c3c..6b06daf3477 100644 --- a/compiler/rustc_codegen_llvm/src/consts.rs +++ b/compiler/rustc_codegen_llvm/src/consts.rs @@ -5,7 +5,7 @@ use rustc_codegen_ssa::common; use rustc_codegen_ssa::traits::*; use rustc_hir::LangItem; use rustc_hir::def::DefKind; -use rustc_hir::def_id::DefId; +use rustc_hir::def_id::{DefId, LOCAL_CRATE}; use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs}; use rustc_middle::mir::interpret::{ Allocation, ConstAllocation, ErrorHandled, InitChunk, Pointer, Scalar as InterpScalar, @@ -17,20 +17,18 @@ use rustc_middle::ty::{self, Instance}; use rustc_middle::{bug, span_bug}; use tracing::{debug, instrument, trace}; -use crate::common::{AsCCharPtr, CodegenCx}; +use crate::common::CodegenCx; use crate::errors::SymbolAlreadyDefined; -use crate::llvm::{self, True}; use crate::type_::Type; use crate::type_of::LayoutLlvmExt; use crate::value::Value; -use crate::{base, debuginfo}; +use crate::{base, debuginfo, llvm}; pub(crate) fn const_alloc_to_llvm<'ll>( cx: &CodegenCx<'ll, '_>, - alloc: ConstAllocation<'_>, + alloc: &Allocation, is_static: bool, ) -> &'ll Value { - let alloc = alloc.inner(); // We expect that callers of const_alloc_to_llvm will instead directly codegen a pointer or // integer for any &ZST where the ZST is a constant (i.e. not a static). We should never be // producing empty LLVM allocations as they're just adding noise to binaries and forcing less @@ -43,7 +41,8 @@ pub(crate) fn const_alloc_to_llvm<'ll>( } let mut llvals = Vec::with_capacity(alloc.provenance().ptrs().len() + 1); let dl = cx.data_layout(); - let pointer_size = dl.pointer_size.bytes() as usize; + let pointer_size = dl.pointer_size(); + let pointer_size_bytes = pointer_size.bytes() as usize; // Note: this function may call `inspect_with_uninit_and_ptr_outside_interpreter`, so `range` // must be within the bounds of `alloc` and not contain or overlap a pointer provenance. @@ -100,7 +99,9 @@ pub(crate) fn const_alloc_to_llvm<'ll>( // This `inspect` is okay since it is within the bounds of the allocation, it doesn't // affect interpreter execution (we inspect the result after interpreter execution), // and we properly interpret the provenance as a relocation pointer offset. - alloc.inspect_with_uninit_and_ptr_outside_interpreter(offset..(offset + pointer_size)), + alloc.inspect_with_uninit_and_ptr_outside_interpreter( + offset..(offset + pointer_size_bytes), + ), ) .expect("const_alloc_to_llvm: could not read relocation pointer") as u64; @@ -111,11 +112,11 @@ pub(crate) fn const_alloc_to_llvm<'ll>( InterpScalar::from_pointer(Pointer::new(prov, Size::from_bytes(ptr_offset)), &cx.tcx), Scalar::Initialized { value: Primitive::Pointer(address_space), - valid_range: WrappingRange::full(dl.pointer_size), + valid_range: WrappingRange::full(pointer_size), }, cx.type_ptr_ext(address_space), )); - next_offset = offset + pointer_size; + next_offset = offset + pointer_size_bytes; } if alloc.len() >= next_offset { let range = next_offset..alloc.len(); @@ -138,7 +139,7 @@ fn codegen_static_initializer<'ll, 'tcx>( def_id: DefId, ) -> Result<(&'ll Value, ConstAllocation<'tcx>), ErrorHandled> { let alloc = cx.tcx.eval_static_initializer(def_id)?; - Ok((const_alloc_to_llvm(cx, alloc, /*static*/ true), alloc)) + Ok((const_alloc_to_llvm(cx, alloc.inner(), /*static*/ true), alloc)) } fn set_global_alignment<'ll>(cx: &CodegenCx<'ll, '_>, gv: &'ll Value, mut align: Align) { @@ -190,8 +191,8 @@ fn check_and_apply_linkage<'ll, 'tcx>( // linkage and there are no definitions), then // `extern_with_linkage_foo` will instead be initialized to // zero. - let mut real_name = "_rust_extern_with_linkage_".to_string(); - real_name.push_str(sym); + let real_name = + format!("_rust_extern_with_linkage_{:016x}_{sym}", cx.tcx.stable_crate_id(LOCAL_CRATE)); let g2 = cx.define_global(&real_name, llty).unwrap_or_else(|| { cx.sess().dcx().emit_fatal(SymbolAlreadyDefined { span: cx.tcx.def_span(def_id), @@ -245,7 +246,7 @@ impl<'ll> CodegenCx<'ll, '_> { }; llvm::set_initializer(gv, cv); set_global_alignment(self, gv, align); - llvm::SetUnnamedAddress(gv, llvm::UnnamedAddr::Global); + llvm::set_unnamed_address(gv, llvm::UnnamedAddr::Global); gv } @@ -270,9 +271,8 @@ impl<'ll> CodegenCx<'ll, '_> { return gv; } let gv = self.static_addr_of_mut(cv, align, kind); - unsafe { - llvm::LLVMSetGlobalConstant(gv, True); - } + llvm::set_global_constant(gv, true); + self.const_globals.borrow_mut().insert(cv, gv); gv } @@ -396,149 +396,140 @@ impl<'ll> CodegenCx<'ll, '_> { } fn codegen_static_item(&mut self, def_id: DefId) { - unsafe { - assert!( - llvm::LLVMGetInitializer( - self.instances.borrow().get(&Instance::mono(self.tcx, def_id)).unwrap() - ) - .is_none() - ); - let attrs = self.tcx.codegen_fn_attrs(def_id); + assert!( + llvm::LLVMGetInitializer( + self.instances.borrow().get(&Instance::mono(self.tcx, def_id)).unwrap() + ) + .is_none() + ); + let attrs = self.tcx.codegen_fn_attrs(def_id); - let Ok((v, alloc)) = codegen_static_initializer(self, def_id) else { - // Error has already been reported - return; - }; - let alloc = alloc.inner(); + let Ok((v, alloc)) = codegen_static_initializer(self, def_id) else { + // Error has already been reported + return; + }; + let alloc = alloc.inner(); - let val_llty = self.val_ty(v); + let val_llty = self.val_ty(v); - let g = self.get_static_inner(def_id, val_llty); - let llty = self.get_type_of_global(g); + let g = self.get_static_inner(def_id, val_llty); + let llty = self.get_type_of_global(g); - let g = if val_llty == llty { - g - } else { - // codegen_static_initializer creates the global value just from the - // `Allocation` data by generating one big struct value that is just - // all the bytes and pointers after each other. This will almost never - // match the type that the static was declared with. Unfortunately - // we can't just LLVMConstBitCast our way out of it because that has very - // specific rules on what can be cast. So instead of adding a new way to - // generate static initializers that match the static's type, we picked - // the easier option and retroactively change the type of the static item itself. - let name = llvm::get_value_name(g).to_vec(); - llvm::set_value_name(g, b""); - - let linkage = llvm::get_linkage(g); - let visibility = llvm::get_visibility(g); - - let new_g = llvm::LLVMRustGetOrInsertGlobal( - self.llmod, - name.as_c_char_ptr(), - name.len(), - val_llty, - ); - - llvm::set_linkage(new_g, linkage); - llvm::set_visibility(new_g, visibility); - - // The old global has had its name removed but is returned by - // get_static since it is in the instance cache. Provide an - // alternative lookup that points to the new global so that - // global_asm! can compute the correct mangled symbol name - // for the global. - self.renamed_statics.borrow_mut().insert(def_id, new_g); - - // To avoid breaking any invariants, we leave around the old - // global for the moment; we'll replace all references to it - // with the new global later. (See base::codegen_backend.) - self.statics_to_rauw.borrow_mut().push((g, new_g)); - new_g - }; - set_global_alignment(self, g, alloc.align); - llvm::set_initializer(g, v); - - self.assume_dso_local(g, true); - - // Forward the allocation's mutability (picked by the const interner) to LLVM. - if alloc.mutability.is_not() { - llvm::LLVMSetGlobalConstant(g, llvm::True); - } + let g = if val_llty == llty { + g + } else { + // codegen_static_initializer creates the global value just from the + // `Allocation` data by generating one big struct value that is just + // all the bytes and pointers after each other. This will almost never + // match the type that the static was declared with. Unfortunately + // we can't just LLVMConstBitCast our way out of it because that has very + // specific rules on what can be cast. So instead of adding a new way to + // generate static initializers that match the static's type, we picked + // the easier option and retroactively change the type of the static item itself. + let name = String::from_utf8(llvm::get_value_name(g)) + .expect("we declare our statics with a utf8-valid name"); + llvm::set_value_name(g, b""); + + let linkage = llvm::get_linkage(g); + let visibility = llvm::get_visibility(g); + + let new_g = self.declare_global(&name, val_llty); + + llvm::set_linkage(new_g, linkage); + llvm::set_visibility(new_g, visibility); + + // The old global has had its name removed but is returned by + // get_static since it is in the instance cache. Provide an + // alternative lookup that points to the new global so that + // global_asm! can compute the correct mangled symbol name + // for the global. + self.renamed_statics.borrow_mut().insert(def_id, new_g); + + // To avoid breaking any invariants, we leave around the old + // global for the moment; we'll replace all references to it + // with the new global later. (See base::codegen_backend.) + self.statics_to_rauw.borrow_mut().push((g, new_g)); + new_g + }; + set_global_alignment(self, g, alloc.align); + llvm::set_initializer(g, v); - debuginfo::build_global_var_di_node(self, def_id, g); + self.assume_dso_local(g, true); - if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) { - llvm::set_thread_local_mode(g, self.tls_model); - } + // Forward the allocation's mutability (picked by the const interner) to LLVM. + if alloc.mutability.is_not() { + llvm::set_global_constant(g, true); + } - // Wasm statics with custom link sections get special treatment as they - // go into custom sections of the wasm executable. The exception to this - // is the `.init_array` section which are treated specially by the wasm linker. - if self.tcx.sess.target.is_like_wasm - && attrs - .link_section - .map(|link_section| !link_section.as_str().starts_with(".init_array")) - .unwrap_or(true) - { - if let Some(section) = attrs.link_section { - let section = llvm::LLVMMDStringInContext2( - self.llcx, - section.as_str().as_c_char_ptr(), - section.as_str().len(), - ); - assert!(alloc.provenance().ptrs().is_empty()); - - // The `inspect` method is okay here because we checked for provenance, and - // because we are doing this access to inspect the final interpreter state (not - // as part of the interpreter execution). - let bytes = - alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()); - let alloc = - llvm::LLVMMDStringInContext2(self.llcx, bytes.as_c_char_ptr(), bytes.len()); - let data = [section, alloc]; - let meta = llvm::LLVMMDNodeInContext2(self.llcx, data.as_ptr(), data.len()); - let val = self.get_metadata_value(meta); + debuginfo::build_global_var_di_node(self, def_id, g); + + if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) { + llvm::set_thread_local_mode(g, self.tls_model); + } + + // Wasm statics with custom link sections get special treatment as they + // go into custom sections of the wasm executable. The exception to this + // is the `.init_array` section which are treated specially by the wasm linker. + if self.tcx.sess.target.is_like_wasm + && attrs + .link_section + .map(|link_section| !link_section.as_str().starts_with(".init_array")) + .unwrap_or(true) + { + if let Some(section) = attrs.link_section { + let section = self.create_metadata(section.as_str().as_bytes()); + assert!(alloc.provenance().ptrs().is_empty()); + + // The `inspect` method is okay here because we checked for provenance, and + // because we are doing this access to inspect the final interpreter state (not + // as part of the interpreter execution). + let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()); + let alloc = self.create_metadata(bytes); + let data = [section, alloc]; + let meta = + unsafe { llvm::LLVMMDNodeInContext2(self.llcx, data.as_ptr(), data.len()) }; + let val = self.get_metadata_value(meta); + unsafe { llvm::LLVMAddNamedMetadataOperand( self.llmod, c"wasm.custom_sections".as_ptr(), val, - ); - } - } else { - base::set_link_section(g, attrs); + ) + }; } + } else { + base::set_link_section(g, attrs); + } - base::set_variable_sanitizer_attrs(g, attrs); - - if attrs.flags.contains(CodegenFnAttrFlags::USED_COMPILER) { - // `USED` and `USED_LINKER` can't be used together. - assert!(!attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER)); - - // The semantics of #[used] in Rust only require the symbol to make it into the - // object file. It is explicitly allowed for the linker to strip the symbol if it - // is dead, which means we are allowed to use `llvm.compiler.used` instead of - // `llvm.used` here. - // - // Additionally, https://reviews.llvm.org/D97448 in LLVM 13 started emitting unique - // sections with SHF_GNU_RETAIN flag for llvm.used symbols, which may trigger bugs - // in the handling of `.init_array` (the static constructor list) in versions of - // the gold linker (prior to the one released with binutils 2.36). - // - // That said, we only ever emit these when `#[used(compiler)]` is explicitly - // requested. This is to avoid similar breakage on other targets, in particular - // MachO targets have *their* static constructor lists broken if `llvm.compiler.used` - // is emitted rather than `llvm.used`. However, that check happens when assigning - // the `CodegenFnAttrFlags` in the `codegen_fn_attrs` query, so we don't need to - // take care of it here. - self.add_compiler_used_global(g); - } - if attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER) { - // `USED` and `USED_LINKER` can't be used together. - assert!(!attrs.flags.contains(CodegenFnAttrFlags::USED_COMPILER)); + base::set_variable_sanitizer_attrs(g, attrs); + + if attrs.flags.contains(CodegenFnAttrFlags::USED_COMPILER) { + // `USED` and `USED_LINKER` can't be used together. + assert!(!attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER)); + + // The semantics of #[used] in Rust only require the symbol to make it into the + // object file. It is explicitly allowed for the linker to strip the symbol if it + // is dead, which means we are allowed to use `llvm.compiler.used` instead of + // `llvm.used` here. + // + // Additionally, https://reviews.llvm.org/D97448 in LLVM 13 started emitting unique + // sections with SHF_GNU_RETAIN flag for llvm.used symbols, which may trigger bugs + // in the handling of `.init_array` (the static constructor list) in versions of + // the gold linker (prior to the one released with binutils 2.36). + // + // That said, we only ever emit these when `#[used(compiler)]` is explicitly + // requested. This is to avoid similar breakage on other targets, in particular + // MachO targets have *their* static constructor lists broken if `llvm.compiler.used` + // is emitted rather than `llvm.used`. However, that check happens when assigning + // the `CodegenFnAttrFlags` in the `codegen_fn_attrs` query, so we don't need to + // take care of it here. + self.add_compiler_used_global(g); + } + if attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER) { + // `USED` and `USED_LINKER` can't be used together. + assert!(!attrs.flags.contains(CodegenFnAttrFlags::USED_COMPILER)); - self.add_used_global(g); - } + self.add_used_global(g); } } diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 0324dff6ff2..27ae729a531 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -34,7 +34,6 @@ use smallvec::SmallVec; use crate::back::write::to_llvm_code_model; use crate::callee::get_fn; -use crate::common::AsCCharPtr; use crate::debuginfo::metadata::apply_vcall_visibility_metadata; use crate::llvm::Metadata; use crate::type_::Type; @@ -169,6 +168,8 @@ pub(crate) unsafe fn create_module<'ll>( let mod_name = SmallCStr::new(mod_name); let llmod = unsafe { llvm::LLVMModuleCreateWithNameInContext(mod_name.as_ptr(), llcx) }; + let cx = SimpleCx::new(llmod, llcx, tcx.data_layout.pointer_size()); + let mut target_data_layout = sess.target.data_layout.to_string(); let llvm_version = llvm_util::get_version(); @@ -206,11 +207,22 @@ pub(crate) unsafe fn create_module<'ll>( // LLVM 21 updated the default layout on nvptx: https://github.com/llvm/llvm-project/pull/124961 target_data_layout = target_data_layout.replace("e-p6:32:32-i64", "e-i64"); } + if sess.target.arch == "amdgpu" { + // LLVM 21 adds the address width for address space 8. + // See https://github.com/llvm/llvm-project/pull/139419 + target_data_layout = target_data_layout.replace("p8:128:128:128:48", "p8:128:128") + } + } + if llvm_version < (22, 0, 0) { + if sess.target.arch == "avr" { + // LLVM 22.0 updated the default layout on avr: https://github.com/llvm/llvm-project/pull/153010 + target_data_layout = target_data_layout.replace("n8:16", "n8") + } } // Ensure the data-layout values hardcoded remain the defaults. { - let tm = crate::back::write::create_informational_target_machine(tcx.sess, false); + let tm = crate::back::write::create_informational_target_machine(sess, false); unsafe { llvm::LLVMRustSetDataLayoutFromTargetMachine(llmod, tm.raw()); } @@ -473,18 +485,14 @@ pub(crate) unsafe fn create_module<'ll>( #[allow(clippy::option_env_unwrap)] let rustc_producer = format!("rustc version {}", option_env!("CFG_VERSION").expect("CFG_VERSION")); - let name_metadata = unsafe { - llvm::LLVMMDStringInContext2( - llcx, - rustc_producer.as_c_char_ptr(), - rustc_producer.as_bytes().len(), - ) - }; + + let name_metadata = cx.create_metadata(rustc_producer.as_bytes()); + unsafe { llvm::LLVMAddNamedMetadataOperand( llmod, c"llvm.ident".as_ptr(), - &llvm::LLVMMetadataAsValue(llcx, llvm::LLVMMDNodeInContext2(llcx, &name_metadata, 1)), + &cx.get_metadata_value(llvm::LLVMMDNodeInContext2(llcx, &name_metadata, 1)), ); } @@ -605,7 +613,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { GenericCx( FullCx { tcx, - scx: SimpleCx::new(llmod, llcx, tcx.data_layout.pointer_size), + scx: SimpleCx::new(llmod, llcx, tcx.data_layout.pointer_size()), use_dll_storage_attrs, tls_model, codegen_unit, @@ -683,6 +691,22 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { unsafe { llvm::LLVMConstInt(ty, val, llvm::False) } } + pub(crate) fn get_const_i64(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i64(), n) + } + + pub(crate) fn get_const_i32(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i32(), n) + } + + pub(crate) fn get_const_i16(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i16(), n) + } + + pub(crate) fn get_const_i8(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i8(), n) + } + pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> { let name = SmallCStr::new(name); unsafe { llvm::LLVMGetNamedFunction((**self).borrow().llmod, name.as_ptr()) } @@ -698,10 +722,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { } } - pub(crate) fn create_metadata(&self, name: String) -> Option<&'ll Metadata> { - Some(unsafe { + pub(crate) fn create_metadata(&self, name: &[u8]) -> &'ll Metadata { + unsafe { llvm::LLVMMDStringInContext2(self.llcx(), name.as_ptr() as *const c_char, name.len()) - }) + } } pub(crate) fn get_functions(&self) -> Vec<&'ll Value> { diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs index f6000e72840..a4b60d420f3 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs @@ -73,48 +73,6 @@ pub(crate) struct CounterExpression { pub(crate) rhs: Counter, } -pub(crate) mod mcdc { - use rustc_middle::mir::coverage::{ConditionId, ConditionInfo, DecisionInfo}; - - /// Must match the layout of `LLVMRustMCDCDecisionParameters`. - #[repr(C)] - #[derive(Clone, Copy, Debug, Default)] - pub(crate) struct DecisionParameters { - bitmap_idx: u32, - num_conditions: u16, - } - - type LLVMConditionId = i16; - - /// Must match the layout of `LLVMRustMCDCBranchParameters`. - #[repr(C)] - #[derive(Clone, Copy, Debug, Default)] - pub(crate) struct BranchParameters { - condition_id: LLVMConditionId, - condition_ids: [LLVMConditionId; 2], - } - - impl From<ConditionInfo> for BranchParameters { - fn from(value: ConditionInfo) -> Self { - let to_llvm_cond_id = |cond_id: Option<ConditionId>| { - cond_id.and_then(|id| LLVMConditionId::try_from(id.as_usize()).ok()).unwrap_or(-1) - }; - let ConditionInfo { condition_id, true_next_id, false_next_id } = value; - Self { - condition_id: to_llvm_cond_id(Some(condition_id)), - condition_ids: [to_llvm_cond_id(false_next_id), to_llvm_cond_id(true_next_id)], - } - } - } - - impl From<DecisionInfo> for DecisionParameters { - fn from(info: DecisionInfo) -> Self { - let DecisionInfo { bitmap_idx, num_conditions } = info; - Self { bitmap_idx, num_conditions } - } - } -} - /// A span of source code coordinates to be embedded in coverage metadata. /// /// Must match the layout of `LLVMRustCoverageSpan`. @@ -148,26 +106,14 @@ pub(crate) struct Regions { pub(crate) code_regions: Vec<CodeRegion>, pub(crate) expansion_regions: Vec<ExpansionRegion>, pub(crate) branch_regions: Vec<BranchRegion>, - pub(crate) mcdc_branch_regions: Vec<MCDCBranchRegion>, - pub(crate) mcdc_decision_regions: Vec<MCDCDecisionRegion>, } impl Regions { /// Returns true if none of this structure's tables contain any regions. pub(crate) fn has_no_regions(&self) -> bool { - let Self { - code_regions, - expansion_regions, - branch_regions, - mcdc_branch_regions, - mcdc_decision_regions, - } = self; - - code_regions.is_empty() - && expansion_regions.is_empty() - && branch_regions.is_empty() - && mcdc_branch_regions.is_empty() - && mcdc_decision_regions.is_empty() + let Self { code_regions, expansion_regions, branch_regions } = self; + + code_regions.is_empty() && expansion_regions.is_empty() && branch_regions.is_empty() } } @@ -195,21 +141,3 @@ pub(crate) struct BranchRegion { pub(crate) true_counter: Counter, pub(crate) false_counter: Counter, } - -/// Must match the layout of `LLVMRustCoverageMCDCBranchRegion`. -#[derive(Clone, Debug)] -#[repr(C)] -pub(crate) struct MCDCBranchRegion { - pub(crate) cov_span: CoverageSpan, - pub(crate) true_counter: Counter, - pub(crate) false_counter: Counter, - pub(crate) mcdc_branch_params: mcdc::BranchParameters, -} - -/// Must match the layout of `LLVMRustCoverageMCDCDecisionRegion`. -#[derive(Clone, Debug)] -#[repr(C)] -pub(crate) struct MCDCDecisionRegion { - pub(crate) cov_span: CoverageSpan, - pub(crate) mcdc_decision_params: mcdc::DecisionParameters, -} diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs index 907d6d41a1f..bc4f6bb6a82 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs @@ -63,13 +63,7 @@ pub(crate) fn write_function_mappings_to_buffer( expressions: &[ffi::CounterExpression], regions: &ffi::Regions, ) -> Vec<u8> { - let ffi::Regions { - code_regions, - expansion_regions, - branch_regions, - mcdc_branch_regions, - mcdc_decision_regions, - } = regions; + let ffi::Regions { code_regions, expansion_regions, branch_regions } = regions; // SAFETY: // - All types are FFI-compatible and have matching representations in Rust/C++. @@ -87,10 +81,6 @@ pub(crate) fn write_function_mappings_to_buffer( expansion_regions.len(), branch_regions.as_ptr(), branch_regions.len(), - mcdc_branch_regions.as_ptr(), - mcdc_branch_regions.len(), - mcdc_decision_regions.as_ptr(), - mcdc_decision_regions.len(), buffer, ) }) diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index a9be833a643..d1cb95507d9 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -1,3 +1,4 @@ +use std::assert_matches::assert_matches; use std::sync::Arc; use itertools::Itertools; @@ -5,6 +6,7 @@ use rustc_abi::Align; use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, ConstCodegenMethods}; use rustc_data_structures::fx::FxIndexMap; use rustc_index::IndexVec; +use rustc_macros::TryFromU32; use rustc_middle::ty::TyCtxt; use rustc_session::RemapFileNameExt; use rustc_session::config::RemapPathScopeComponents; @@ -20,6 +22,23 @@ mod covfun; mod spans; mod unused; +/// Version number that will be included the `__llvm_covmap` section header. +/// Corresponds to LLVM's `llvm::coverage::CovMapVersion` (in `CoverageMapping.h`), +/// or at least the subset that we know and care about. +/// +/// Note that version `n` is encoded as `(n-1)`. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, TryFromU32)] +enum CovmapVersion { + /// Used by LLVM 18 onwards. + Version7 = 6, +} + +impl CovmapVersion { + fn to_u32(self) -> u32 { + self as u32 + } +} + /// Generates and exports the coverage map, which is embedded in special /// linker sections in the final binary. /// @@ -29,38 +48,28 @@ pub(crate) fn finalize(cx: &mut CodegenCx<'_, '_>) { let tcx = cx.tcx; // Ensure that LLVM is using a version of the coverage mapping format that - // agrees with our Rust-side code. Expected versions (encoded as n-1) are: - // - `CovMapVersion::Version7` (6) used by LLVM 18-19 - let covmap_version = { - let llvm_covmap_version = llvm_cov::mapping_version(); - let expected_versions = 6..=6; - assert!( - expected_versions.contains(&llvm_covmap_version), - "Coverage mapping version exposed by `llvm-wrapper` is out of sync; \ - expected {expected_versions:?} but was {llvm_covmap_version}" - ); - // This is the version number that we will embed in the covmap section: - llvm_covmap_version - }; + // agrees with our Rust-side code. Expected versions are: + // - `Version7` (6) used by LLVM 18 onwards. + let covmap_version = + CovmapVersion::try_from(llvm_cov::mapping_version()).unwrap_or_else(|raw_version: u32| { + panic!("unknown coverage mapping version reported by `llvm-wrapper`: {raw_version}") + }); + assert_matches!(covmap_version, CovmapVersion::Version7); debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name()); // FIXME(#132395): Can this be none even when coverage is enabled? - let instances_used = match cx.coverage_cx { - Some(ref cx) => cx.instances_used.borrow(), - None => return, - }; - - let mut covfun_records = instances_used - .iter() - .copied() + let Some(ref coverage_cx) = cx.coverage_cx else { return }; + + let mut covfun_records = coverage_cx + .instances_used() + .into_iter() // Sort by symbol name, so that the global file table is built in an // order that doesn't depend on the stable-hash-based order in which // instances were visited during codegen. .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name) .filter_map(|instance| prepare_covfun_record(tcx, instance, true)) .collect::<Vec<_>>(); - drop(instances_used); // In a single designated CGU, also prepare covfun records for functions // in this crate that were instrumented for coverage, but are unused. @@ -205,7 +214,11 @@ impl VirtualFileMapping { /// Generates the contents of the covmap record for this CGU, which mostly /// consists of a header and a list of filenames. The record is then stored /// as a global variable in the `__llvm_covmap` section. -fn generate_covmap_record<'ll>(cx: &mut CodegenCx<'ll, '_>, version: u32, filenames_buffer: &[u8]) { +fn generate_covmap_record<'ll>( + cx: &mut CodegenCx<'ll, '_>, + version: CovmapVersion, + filenames_buffer: &[u8], +) { // A covmap record consists of four target-endian u32 values, followed by // the encoded filenames table. Two of the header fields are unused in // modern versions of the LLVM coverage mapping format, and are always 0. @@ -216,7 +229,7 @@ fn generate_covmap_record<'ll>(cx: &mut CodegenCx<'ll, '_>, version: u32, filena cx.const_u32(0), // (unused) cx.const_u32(filenames_buffer.len() as u32), cx.const_u32(0), // (unused) - cx.const_u32(version), + cx.const_u32(version.to_u32()), ], /* packed */ false, ); diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs index b704cf2b1cd..e0da8d36876 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs @@ -27,6 +27,9 @@ use crate::llvm; /// the final record that will be embedded in the `__llvm_covfun` section. #[derive(Debug)] pub(crate) struct CovfunRecord<'tcx> { + /// Not used directly, but helpful in debug messages. + _instance: Instance<'tcx>, + mangled_function_name: &'tcx str, source_hash: u64, is_used: bool, @@ -55,6 +58,7 @@ pub(crate) fn prepare_covfun_record<'tcx>( let expressions = prepare_expressions(ids_info); let mut covfun = CovfunRecord { + _instance: instance, mangled_function_name: tcx.symbol_name(instance).name, source_hash: if is_used { fn_cov_info.function_source_hash } else { 0 }, is_used, @@ -102,11 +106,21 @@ fn fill_region_tables<'tcx>( ids_info: &'tcx CoverageIdsInfo, covfun: &mut CovfunRecord<'tcx>, ) { + // If this function is unused, replace all counters with zero. + let counter_for_bcb = |bcb: BasicCoverageBlock| -> ffi::Counter { + let term = if covfun.is_used { + ids_info.term_for_bcb[bcb].expect("every BCB in a mapping was given a term") + } else { + CovTerm::Zero + }; + ffi::Counter::from_term(term) + }; + // Currently a function's mappings must all be in the same file, so use the // first mapping's span to determine the file. let source_map = tcx.sess.source_map(); let Some(first_span) = (try { fn_cov_info.mappings.first()?.span }) else { - debug_assert!(false, "function has no mappings: {:?}", covfun.mangled_function_name); + debug_assert!(false, "function has no mappings: {covfun:?}"); return; }; let source_file = source_map.lookup_source_file(first_span.lo()); @@ -117,7 +131,7 @@ fn fill_region_tables<'tcx>( // codegen needs to handle that gracefully to avoid #133606. // It's hard for tests to trigger this organically, so instead we set // `-Zcoverage-options=discard-all-spans-in-codegen` to force it to occur. - let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen(); + let discard_all = tcx.sess.coverage_options().discard_all_spans_in_codegen; let make_coords = |span: Span| { if discard_all { None } else { spans::make_coords(source_map, &source_file, span) } }; @@ -126,23 +140,11 @@ fn fill_region_tables<'tcx>( code_regions, expansion_regions: _, // FIXME(Zalathar): Fill out support for expansion regions branch_regions, - mcdc_branch_regions, - mcdc_decision_regions, } = &mut covfun.regions; // For each counter/region pair in this function+file, convert it to a // form suitable for FFI. for &Mapping { ref kind, span } in &fn_cov_info.mappings { - // If this function is unused, replace all counters with zero. - let counter_for_bcb = |bcb: BasicCoverageBlock| -> ffi::Counter { - let term = if covfun.is_used { - ids_info.term_for_bcb[bcb].expect("every BCB in a mapping was given a term") - } else { - CovTerm::Zero - }; - ffi::Counter::from_term(term) - }; - let Some(coords) = make_coords(span) else { continue }; let cov_span = coords.make_coverage_span(local_file_id); @@ -157,20 +159,6 @@ fn fill_region_tables<'tcx>( false_counter: counter_for_bcb(false_bcb), }); } - MappingKind::MCDCBranch { true_bcb, false_bcb, mcdc_params } => { - mcdc_branch_regions.push(ffi::MCDCBranchRegion { - cov_span, - true_counter: counter_for_bcb(true_bcb), - false_counter: counter_for_bcb(false_bcb), - mcdc_branch_params: ffi::mcdc::BranchParameters::from(mcdc_params), - }); - } - MappingKind::MCDCDecision(mcdc_decision_params) => { - mcdc_decision_regions.push(ffi::MCDCDecisionRegion { - cov_span, - mcdc_decision_params: ffi::mcdc::DecisionParameters::from(mcdc_decision_params), - }); - } } } } @@ -184,6 +172,7 @@ pub(crate) fn generate_covfun_record<'tcx>( covfun: &CovfunRecord<'tcx>, ) { let &CovfunRecord { + _instance, mangled_function_name, source_hash, is_used, diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs index 39a59560c9d..574463be7ff 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs @@ -39,7 +39,10 @@ impl Coords { /// or other expansions), and if it does happen then skipping a span or function is /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid. pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) -> Option<Coords> { - let span = ensure_non_empty_span(source_map, span)?; + if span.is_empty() { + debug_assert!(false, "can't make coords from empty span: {span:?}"); + return None; + } let lo = span.lo(); let hi = span.hi(); @@ -70,29 +73,6 @@ pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) }) } -fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> { - if !span.is_empty() { - return Some(span); - } - - // The span is empty, so try to enlarge it to cover an adjacent '{' or '}'. - source_map - .span_to_source(span, |src, start, end| try { - // Adjusting span endpoints by `BytePos(1)` is normally a bug, - // but in this case we have specifically checked that the character - // we're skipping over is one of two specific ASCII characters, so - // adjusting by exactly 1 byte is correct. - if src.as_bytes().get(end).copied() == Some(b'{') { - Some(span.with_hi(span.hi() + BytePos(1))) - } else if start > 0 && src.as_bytes()[start - 1] == b'}' { - Some(span.with_lo(span.lo() - BytePos(1))) - } else { - None - } - }) - .ok()? -} - /// If `llvm-cov` sees a source region that is improperly ordered (end < start), /// it will immediately exit with a fatal error. To prevent that from happening, /// discard regions that are improperly ordered, or might be interpreted in a diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index eefbd7cf6c4..6a58f495c9d 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -1,11 +1,10 @@ use std::cell::{OnceCell, RefCell}; use std::ffi::{CStr, CString}; -use rustc_abi::Size; use rustc_codegen_ssa::traits::{ - BuilderMethods, ConstCodegenMethods, CoverageInfoBuilderMethods, MiscCodegenMethods, + ConstCodegenMethods, CoverageInfoBuilderMethods, MiscCodegenMethods, }; -use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; +use rustc_data_structures::fx::FxIndexMap; use rustc_middle::mir::coverage::CoverageKind; use rustc_middle::ty::Instance; use tracing::{debug, instrument}; @@ -20,38 +19,29 @@ mod mapgen; /// Extra per-CGU context/state needed for coverage instrumentation. pub(crate) struct CguCoverageContext<'ll, 'tcx> { - /// Coverage data for each instrumented function identified by DefId. - pub(crate) instances_used: RefCell<FxIndexSet<Instance<'tcx>>>, - pub(crate) pgo_func_name_var_map: RefCell<FxHashMap<Instance<'tcx>, &'ll llvm::Value>>, - pub(crate) mcdc_condition_bitmap_map: RefCell<FxHashMap<Instance<'tcx>, Vec<&'ll llvm::Value>>>, + /// Associates function instances with an LLVM global that holds the + /// function's symbol name, as needed by LLVM coverage intrinsics. + /// + /// Instances in this map are also considered "used" for the purposes of + /// emitting covfun records. Every covfun record holds a hash of its + /// symbol name, and `llvm-cov` will exit fatally if it can't resolve that + /// hash back to an entry in the binary's `__llvm_prf_names` linker section. + pub(crate) pgo_func_name_var_map: RefCell<FxIndexMap<Instance<'tcx>, &'ll llvm::Value>>, covfun_section_name: OnceCell<CString>, } impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> { pub(crate) fn new() -> Self { - Self { - instances_used: RefCell::<FxIndexSet<_>>::default(), - pgo_func_name_var_map: Default::default(), - mcdc_condition_bitmap_map: Default::default(), - covfun_section_name: Default::default(), - } + Self { pgo_func_name_var_map: Default::default(), covfun_section_name: Default::default() } } - /// LLVM use a temp value to record evaluated mcdc test vector of each decision, which is - /// called condition bitmap. In order to handle nested decisions, several condition bitmaps can - /// be allocated for a function body. These values are named `mcdc.addr.{i}` and are a 32-bit - /// integers. They respectively hold the condition bitmaps for decisions with a depth of `i`. - fn try_get_mcdc_condition_bitmap( - &self, - instance: &Instance<'tcx>, - decision_depth: u16, - ) -> Option<&'ll llvm::Value> { - self.mcdc_condition_bitmap_map - .borrow() - .get(instance) - .and_then(|bitmap_map| bitmap_map.get(decision_depth as usize)) - .copied() // Dereference Option<&&Value> to Option<&Value> + /// Returns the list of instances considered "used" in this CGU, as + /// inferred from the keys of `pgo_func_name_var_map`. + pub(crate) fn instances_used(&self) -> Vec<Instance<'tcx>> { + // Collecting into a Vec is way easier than trying to juggle RefCell + // projections, and this should only run once per CGU anyway. + self.pgo_func_name_var_map.borrow().keys().copied().collect::<Vec<_>>() } } @@ -78,7 +68,10 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// string, to hold the function name passed to LLVM intrinsic /// `instrprof.increment()`. The `Value` is only created once per instance. /// Multiple invocations with the same instance return the same `Value`. - fn get_pgo_func_name_var(&self, instance: Instance<'tcx>) -> &'ll llvm::Value { + /// + /// This has the side-effect of causing coverage codegen to consider this + /// function "used", making it eligible to emit an associated covfun record. + fn ensure_pgo_func_name_var(&self, instance: Instance<'tcx>) -> &'ll llvm::Value { debug!("getting pgo_func_name_var for instance={:?}", instance); let mut pgo_func_name_var_map = self.coverage_cx().pgo_func_name_var_map.borrow_mut(); pgo_func_name_var_map.entry(instance).or_insert_with(|| { @@ -90,38 +83,6 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { } impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { - fn init_coverage(&mut self, instance: Instance<'tcx>) { - let Some(function_coverage_info) = - self.tcx.instance_mir(instance.def).function_coverage_info.as_deref() - else { - return; - }; - - // If there are no MC/DC bitmaps to set up, return immediately. - if function_coverage_info.mcdc_bitmap_bits == 0 { - return; - } - - let fn_name = self.get_pgo_func_name_var(instance); - let hash = self.const_u64(function_coverage_info.function_source_hash); - let bitmap_bits = self.const_u32(function_coverage_info.mcdc_bitmap_bits as u32); - self.mcdc_parameters(fn_name, hash, bitmap_bits); - - // Create pointers named `mcdc.addr.{i}` to stack-allocated condition bitmaps. - let mut cond_bitmaps = vec![]; - for i in 0..function_coverage_info.mcdc_num_condition_bitmaps { - // MC/DC intrinsics will perform loads/stores that use the ABI default - // alignment for i32, so our variable declaration should match. - let align = self.tcx.data_layout.i32_align.abi; - let cond_bitmap = self.alloca(Size::from_bytes(4), align); - llvm::set_value_name(cond_bitmap, format!("mcdc.addr.{i}").as_bytes()); - self.store(self.const_i32(0), cond_bitmap, align); - cond_bitmaps.push(cond_bitmap); - } - - self.coverage_cx().mcdc_condition_bitmap_map.borrow_mut().insert(instance, cond_bitmaps); - } - #[instrument(level = "debug", skip(self))] fn add_coverage(&mut self, instance: Instance<'tcx>, kind: &CoverageKind) { // Our caller should have already taken care of inlining subtleties, @@ -138,7 +99,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { // When that happens, we currently just discard those statements, so // the corresponding code will be undercounted. // FIXME(Zalathar): Find a better solution for mixed-coverage builds. - let Some(coverage_cx) = &bx.cx.coverage_cx else { return }; + let Some(_coverage_cx) = &bx.cx.coverage_cx else { return }; let Some(function_coverage_info) = bx.tcx.instance_mir(instance.def).function_coverage_info.as_deref() @@ -151,11 +112,6 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { return; }; - // Mark the instance as used in this CGU, for coverage purposes. - // This includes functions that were not partitioned into this CGU, - // but were MIR-inlined into one of this CGU's functions. - coverage_cx.instances_used.borrow_mut().insert(instance); - match *kind { CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!( "marker statement {kind:?} should have been removed by CleanupPostBorrowck" @@ -163,7 +119,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { CoverageKind::VirtualCounter { bcb } if let Some(&id) = ids_info.phys_counter_for_node.get(&bcb) => { - let fn_name = bx.get_pgo_func_name_var(instance); + let fn_name = bx.ensure_pgo_func_name_var(instance); let hash = bx.const_u64(function_coverage_info.function_source_hash); let num_counters = bx.const_u32(ids_info.num_counters); let index = bx.const_u32(id.as_u32()); @@ -175,30 +131,6 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { } // If a BCB doesn't have an associated physical counter, there's nothing to codegen. CoverageKind::VirtualCounter { .. } => {} - CoverageKind::CondBitmapUpdate { index, decision_depth } => { - let cond_bitmap = coverage_cx - .try_get_mcdc_condition_bitmap(&instance, decision_depth) - .expect("mcdc cond bitmap should have been allocated for updating"); - let cond_index = bx.const_i32(index as i32); - bx.mcdc_condbitmap_update(cond_index, cond_bitmap); - } - CoverageKind::TestVectorBitmapUpdate { bitmap_idx, decision_depth } => { - let cond_bitmap = - coverage_cx.try_get_mcdc_condition_bitmap(&instance, decision_depth).expect( - "mcdc cond bitmap should have been allocated for merging \ - into the global bitmap", - ); - assert!( - bitmap_idx as usize <= function_coverage_info.mcdc_bitmap_bits, - "bitmap index of the decision out of range" - ); - - let fn_name = bx.get_pgo_func_name_var(instance); - let hash = bx.const_u64(function_coverage_info.function_source_hash); - let bitmap_index = bx.const_u32(bitmap_idx); - bx.mcdc_tvbitmap_update(fn_name, hash, bitmap_index, cond_bitmap); - bx.mcdc_condbitmap_reset(cond_bitmap); - } } } } diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs index 8f0948b8183..6eb7042da61 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs @@ -1,13 +1,11 @@ // .debug_gdb_scripts binary section. -use rustc_ast::attr; use rustc_codegen_ssa::base::collect_debugger_visualizers_transitive; use rustc_codegen_ssa::traits::*; use rustc_hir::def_id::LOCAL_CRATE; use rustc_middle::bug; use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerType; use rustc_session::config::{CrateType, DebugInfo}; -use rustc_span::sym; use crate::builder::Builder; use crate::common::CodegenCx; @@ -75,7 +73,7 @@ pub(crate) fn get_or_insert_gdb_debug_scripts_section_global<'ll>( llvm::set_section(section_var, c".debug_gdb_scripts"); llvm::set_initializer(section_var, cx.const_bytes(section_contents)); llvm::LLVMSetGlobalConstant(section_var, llvm::True); - llvm::LLVMSetUnnamedAddress(section_var, llvm::UnnamedAddr::Global); + llvm::set_unnamed_address(section_var, llvm::UnnamedAddr::Global); llvm::set_linkage(section_var, llvm::Linkage::LinkOnceODRLinkage); // This should make sure that the whole section is not larger than // the string it contains. Otherwise we get a warning from GDB. @@ -86,9 +84,6 @@ pub(crate) fn get_or_insert_gdb_debug_scripts_section_global<'ll>( } pub(crate) fn needs_gdb_debug_scripts_section(cx: &CodegenCx<'_, '_>) -> bool { - let omit_gdb_pretty_printer_section = - attr::contains_name(cx.tcx.hir_krate_attrs(), sym::omit_gdb_pretty_printer_section); - // To ensure the section `__rustc_debug_gdb_scripts_section__` will not create // ODR violations at link time, this section will not be emitted for rlibs since // each rlib could produce a different set of visualizers that would be embedded @@ -117,8 +112,7 @@ pub(crate) fn needs_gdb_debug_scripts_section(cx: &CodegenCx<'_, '_>) -> bool { } }); - !omit_gdb_pretty_printer_section - && cx.sess().opts.debuginfo != DebugInfo::None + cx.sess().opts.debuginfo != DebugInfo::None && cx.sess().target.emit_debug_gdb_scripts && embed_visualizers } diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index 7f3e486ca31..0e9dbfba658 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::{iter, ptr}; -use libc::{c_char, c_longlong, c_uint}; +use libc::{c_longlong, c_uint}; use rustc_abi::{Align, Size}; use rustc_codegen_ssa::debuginfo::type_names::{VTableNameKind, cpp_like_debuginfo}; use rustc_codegen_ssa::traits::*; @@ -159,13 +159,15 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>( return_if_di_node_created_in_meantime!(cx, unique_type_id); let data_layout = &cx.tcx.data_layout; + let pointer_size = data_layout.pointer_size(); + let pointer_align = data_layout.pointer_align(); let ptr_type_debuginfo_name = compute_debuginfo_type_name(cx.tcx, ptr_type, true); match wide_pointer_kind(cx, pointee_type) { None => { // This is a thin pointer. Create a regular pointer type and give it the correct name. assert_eq!( - (data_layout.pointer_size, data_layout.pointer_align.abi), + (pointer_size, pointer_align.abi), cx.size_and_align_of(ptr_type), "ptr_type={ptr_type}, pointee_type={pointee_type}", ); @@ -174,8 +176,8 @@ fn build_pointer_or_reference_di_node<'ll, 'tcx>( llvm::LLVMRustDIBuilderCreatePointerType( DIB(cx), pointee_type_di_node, - data_layout.pointer_size.bits(), - data_layout.pointer_align.abi.bits() as u32, + pointer_size.bits(), + pointer_align.abi.bits() as u32, 0, // Ignore DWARF address space. ptr_type_debuginfo_name.as_c_char_ptr(), ptr_type_debuginfo_name.len(), @@ -319,7 +321,9 @@ fn build_subroutine_type_di_node<'ll, 'tcx>( let name = compute_debuginfo_type_name(cx.tcx, fn_ty, false); let (size, align) = match fn_ty.kind() { ty::FnDef(..) => (Size::ZERO, Align::ONE), - ty::FnPtr(..) => (cx.tcx.data_layout.pointer_size, cx.tcx.data_layout.pointer_align.abi), + ty::FnPtr(..) => { + (cx.tcx.data_layout.pointer_size(), cx.tcx.data_layout.pointer_align().abi) + } _ => unreachable!(), }; let di_node = unsafe { @@ -504,7 +508,7 @@ fn recursion_marker_type_di_node<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>) -> &'ll D create_basic_type( cx, "<recur_type>", - cx.tcx.data_layout.pointer_size, + cx.tcx.data_layout.pointer_size(), dwarf_const::DW_ATE_unsigned, ) }) @@ -1578,13 +1582,9 @@ pub(crate) fn apply_vcall_visibility_metadata<'ll, 'tcx>( }; let trait_ref_typeid = typeid_for_trait_ref(cx.tcx, trait_ref); + let typeid = cx.create_metadata(trait_ref_typeid.as_bytes()); unsafe { - let typeid = llvm::LLVMMDStringInContext2( - cx.llcx, - trait_ref_typeid.as_ptr() as *const c_char, - trait_ref_typeid.as_bytes().len(), - ); let v = [llvm::LLVMValueAsMetadata(cx.const_usize(0)), typeid]; llvm::LLVMRustGlobalAddMetadata( vtable, @@ -1626,7 +1626,7 @@ pub(crate) fn create_vtable_di_node<'ll, 'tcx>( // When full debuginfo is enabled, we want to try and prevent vtables from being // merged. Otherwise debuggers will have a hard time mapping from dyn pointer // to concrete type. - llvm::SetUnnamedAddress(vtable, llvm::UnnamedAddr::No); + llvm::set_unnamed_address(vtable, llvm::UnnamedAddr::No); let vtable_name = compute_debuginfo_vtable_name(cx.tcx, ty, poly_trait_ref, VTableNameKind::GlobalVariable); diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs index 56fb12d3c22..d1502d2b1e6 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs @@ -285,8 +285,8 @@ pub(super) fn build_type_with_children<'ll, 'tcx>( // Item(T), // } // ``` - let is_expanding_recursive = - debug_context(cx).adt_stack.borrow().iter().any(|(parent_def_id, parent_args)| { + let is_expanding_recursive = adt_def.is_enum() + && debug_context(cx).adt_stack.borrow().iter().any(|(parent_def_id, parent_args)| { if def_id == *parent_def_id { args.iter().zip(parent_args.iter()).any(|(arg, parent_arg)| { if let (Some(arg), Some(parent_arg)) = (arg.as_type(), parent_arg.as_type()) diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs index 5ca2505cec4..6cbf2dbf7d3 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs @@ -533,7 +533,7 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { // First, let's see if this is a method within an inherent impl. Because // if yes, we want to make the result subroutine DIE a child of the // subroutine's self-type. - if let Some(impl_def_id) = cx.tcx.impl_of_method(instance.def_id()) { + if let Some(impl_def_id) = cx.tcx.impl_of_assoc(instance.def_id()) { // If the method does *not* belong to a trait, proceed if cx.tcx.trait_id_of_impl(impl_def_id).is_none() { let impl_self_ty = cx.tcx.instantiate_and_normalize_erasing_regions( diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs index 2419ec1f888..960a895a203 100644 --- a/compiler/rustc_codegen_llvm/src/declare.rs +++ b/compiler/rustc_codegen_llvm/src/declare.rs @@ -49,7 +49,7 @@ pub(crate) fn declare_simple_fn<'ll>( }; llvm::SetFunctionCallConv(llfn, callconv); - llvm::SetUnnamedAddress(llfn, unnamed); + llvm::set_unnamed_address(llfn, unnamed); llvm::set_visibility(llfn, visibility); llfn @@ -176,7 +176,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { { let typeid = cfi::typeid_for_instance(self.tcx, instance, options); if typeids.insert(typeid.clone()) { - self.add_type_metadata(llfn, typeid); + self.add_type_metadata(llfn, typeid.as_bytes()); } } } else { @@ -189,7 +189,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { .map(cfi::TypeIdOptions::from_iter) { let typeid = cfi::typeid_for_fnabi(self.tcx, fn_abi, options); - self.add_type_metadata(llfn, typeid); + self.add_type_metadata(llfn, typeid.as_bytes()); } } } @@ -215,7 +215,9 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { llfn } +} +impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { /// Declare a global with an intention to define it. /// /// Use this function when you intend to define a global. This function will @@ -234,13 +236,13 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// /// Use this function when you intend to define a global without a name. pub(crate) fn define_private_global(&self, ty: &'ll Type) -> &'ll Value { - unsafe { llvm::LLVMRustInsertPrivateGlobal(self.llmod, ty) } + unsafe { llvm::LLVMRustInsertPrivateGlobal(self.llmod(), ty) } } /// Gets declared value by name. pub(crate) fn get_declared_value(&self, name: &str) -> Option<&'ll Value> { debug!("get_declared_value(name={:?})", name); - unsafe { llvm::LLVMRustGetNamedValue(self.llmod, name.as_c_char_ptr(), name.len()) } + unsafe { llvm::LLVMRustGetNamedValue(self.llmod(), name.as_c_char_ptr(), name.len()) } } /// Gets defined or externally defined (AvailableExternally linkage) value by diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs index 8bc74fbec7e..627b0c9ff3b 100644 --- a/compiler/rustc_codegen_llvm/src/errors.rs +++ b/compiler/rustc_codegen_llvm/src/errors.rs @@ -3,36 +3,12 @@ use std::path::Path; use rustc_data_structures::small_c_str::SmallCStr; use rustc_errors::{Diag, DiagCtxtHandle, Diagnostic, EmissionGuarantee, Level}; -use rustc_macros::{Diagnostic, Subdiagnostic}; +use rustc_macros::Diagnostic; use rustc_span::Span; use crate::fluent_generated as fluent; #[derive(Diagnostic)] -#[diag(codegen_llvm_unknown_ctarget_feature_prefix)] -#[note] -pub(crate) struct UnknownCTargetFeaturePrefix<'a> { - pub feature: &'a str, -} - -#[derive(Diagnostic)] -#[diag(codegen_llvm_unknown_ctarget_feature)] -#[note] -pub(crate) struct UnknownCTargetFeature<'a> { - pub feature: &'a str, - #[subdiagnostic] - pub rust_feature: PossibleFeature<'a>, -} - -#[derive(Subdiagnostic)] -pub(crate) enum PossibleFeature<'a> { - #[help(codegen_llvm_possible_feature)] - Some { rust_feature: &'a str }, - #[help(codegen_llvm_consider_filing_feature_request)] - None, -} - -#[derive(Diagnostic)] #[diag(codegen_llvm_symbol_already_defined)] pub(crate) struct SymbolAlreadyDefined<'a> { #[primary_span] @@ -44,11 +20,6 @@ pub(crate) struct SymbolAlreadyDefined<'a> { #[diag(codegen_llvm_sanitizer_memtag_requires_mte)] pub(crate) struct SanitizerMemtagRequiresMte; -#[derive(Diagnostic)] -#[diag(codegen_llvm_dynamic_linking_with_lto)] -#[note] -pub(crate) struct DynamicLinkingWithLTO; - pub(crate) struct ParseTargetMachineConfig<'a>(pub LlvmError<'a>); impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> { @@ -62,29 +33,13 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> { } #[derive(Diagnostic)] -#[diag(codegen_llvm_autodiff_without_lto)] -pub(crate) struct AutoDiffWithoutLTO; - -#[derive(Diagnostic)] #[diag(codegen_llvm_autodiff_without_enable)] pub(crate) struct AutoDiffWithoutEnable; #[derive(Diagnostic)] -#[diag(codegen_llvm_lto_disallowed)] -pub(crate) struct LtoDisallowed; - -#[derive(Diagnostic)] -#[diag(codegen_llvm_lto_dylib)] -pub(crate) struct LtoDylib; - -#[derive(Diagnostic)] -#[diag(codegen_llvm_lto_proc_macro)] -pub(crate) struct LtoProcMacro; - -#[derive(Diagnostic)] #[diag(codegen_llvm_lto_bitcode_from_rlib)] pub(crate) struct LtoBitcodeFromRlib { - pub llvm_err: String, + pub err: String, } #[derive(Diagnostic)] diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index f7f062849a8..7b27e496986 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -103,23 +103,25 @@ fn call_simple_intrinsic<'ll, 'tcx>( sym::minnumf64 => ("llvm.minnum", &[bx.type_f64()]), sym::minnumf128 => ("llvm.minnum", &[bx.type_f128()]), - sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]), - sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]), - sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]), - // There are issues on x86_64 and aarch64 with the f128 variant, - // let's instead use the instrinsic fallback body. - // sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]), + // FIXME: LLVM currently mis-compile those intrinsics, re-enable them + // when llvm/llvm-project#{139380,139381,140445} are fixed. + //sym::minimumf16 => ("llvm.minimum", &[bx.type_f16()]), + //sym::minimumf32 => ("llvm.minimum", &[bx.type_f32()]), + //sym::minimumf64 => ("llvm.minimum", &[bx.type_f64()]), + //sym::minimumf128 => ("llvm.minimum", &[cx.type_f128()]), + // sym::maxnumf16 => ("llvm.maxnum", &[bx.type_f16()]), sym::maxnumf32 => ("llvm.maxnum", &[bx.type_f32()]), sym::maxnumf64 => ("llvm.maxnum", &[bx.type_f64()]), sym::maxnumf128 => ("llvm.maxnum", &[bx.type_f128()]), - sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]), - sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]), - sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]), - // There are issues on x86_64 and aarch64 with the f128 variant, - // let's instead use the instrinsic fallback body. - // sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]), + // FIXME: LLVM currently mis-compile those intrinsics, re-enable them + // when llvm/llvm-project#{139380,139381,140445} are fixed. + //sym::maximumf16 => ("llvm.maximum", &[bx.type_f16()]), + //sym::maximumf32 => ("llvm.maximum", &[bx.type_f32()]), + //sym::maximumf64 => ("llvm.maximum", &[bx.type_f64()]), + //sym::maximumf128 => ("llvm.maximum", &[cx.type_f128()]), + // sym::copysignf16 => ("llvm.copysign", &[bx.type_f16()]), sym::copysignf32 => ("llvm.copysign", &[bx.type_f32()]), sym::copysignf64 => ("llvm.copysign", &[bx.type_f64()]), @@ -380,26 +382,16 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { let width = size.bits(); let llty = self.type_ix(width); match name { - sym::ctlz | sym::cttz => { - let y = self.const_bool(false); - let ret = self.call_intrinsic( - format!("llvm.{name}"), - &[llty], - &[args[0].immediate(), y], - ); - - self.intcast(ret, result.layout.llvm_type(self), false) - } - sym::ctlz_nonzero => { - let y = self.const_bool(true); - let ret = - self.call_intrinsic("llvm.ctlz", &[llty], &[args[0].immediate(), y]); - self.intcast(ret, result.layout.llvm_type(self), false) - } - sym::cttz_nonzero => { - let y = self.const_bool(true); + sym::ctlz | sym::ctlz_nonzero | sym::cttz | sym::cttz_nonzero => { + let y = + self.const_bool(name == sym::ctlz_nonzero || name == sym::cttz_nonzero); + let llvm_name = if name == sym::ctlz || name == sym::ctlz_nonzero { + "llvm.ctlz" + } else { + "llvm.cttz" + }; let ret = - self.call_intrinsic("llvm.cttz", &[llty], &[args[0].immediate(), y]); + self.call_intrinsic(llvm_name, &[llty], &[args[0].immediate(), y]); self.intcast(ret, result.layout.llvm_type(self), false) } sym::ctpop => { @@ -456,7 +448,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { // For rusty ABIs, small aggregates are actually passed // as `RegKind::Integer` (see `FnAbi::adjust_for_abi`), // so we re-use that same threshold here. - layout.size() <= self.data_layout().pointer_size * 2 + layout.size() <= self.data_layout().pointer_size() * 2 } }; @@ -756,8 +748,8 @@ fn codegen_msvc_try<'ll, 'tcx>( // } // // More information can be found in libstd's seh.rs implementation. - let ptr_size = bx.tcx().data_layout.pointer_size; - let ptr_align = bx.tcx().data_layout.pointer_align.abi; + let ptr_size = bx.tcx().data_layout.pointer_size(); + let ptr_align = bx.tcx().data_layout.pointer_align().abi; let slot = bx.alloca(ptr_size, ptr_align); let try_func_ty = bx.type_func(&[bx.type_ptr()], bx.type_void()); bx.invoke(try_func_ty, None, None, try_func, &[data], normal, catchswitch, None, None); @@ -1029,8 +1021,8 @@ fn codegen_emcc_try<'ll, 'tcx>( // We need to pass two values to catch_func (ptr and is_rust_panic), so // create an alloca and pass a pointer to that. - let ptr_size = bx.tcx().data_layout.pointer_size; - let ptr_align = bx.tcx().data_layout.pointer_align.abi; + let ptr_size = bx.tcx().data_layout.pointer_size(); + let ptr_align = bx.tcx().data_layout.pointer_align().abi; let i8_align = bx.tcx().data_layout.i8_align.abi; // Required in order for there to be no padding between the fields. assert!(i8_align <= ptr_align); @@ -1156,9 +1148,11 @@ fn generic_simd_intrinsic<'ll, 'tcx>( macro_rules! require_int_or_uint_ty { ($ty: expr, $diag: expr) => { match $ty { - ty::Int(i) => i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()), + ty::Int(i) => { + i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits()) + } ty::Uint(i) => { - i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()) + i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits()) } _ => { return_error!($diag); @@ -1537,6 +1531,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( sym::simd_fsin => "llvm.sin", sym::simd_fsqrt => "llvm.sqrt", sym::simd_round => "llvm.round", + sym::simd_round_ties_even => "llvm.rint", sym::simd_trunc => "llvm.trunc", _ => return_error!(InvalidMonomorphization::UnrecognizedIntrinsic { span, name }), }; @@ -1563,6 +1558,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( | sym::simd_fsqrt | sym::simd_relaxed_fma | sym::simd_round + | sym::simd_round_ties_even | sym::simd_trunc ) { return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args); @@ -2010,10 +2006,10 @@ fn generic_simd_intrinsic<'ll, 'tcx>( } else { let bitwidth = match in_elem.kind() { ty::Int(i) => { - i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()) + i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits()) } ty::Uint(i) => { - i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size.bits()) + i.bit_width().unwrap_or_else(|| bx.data_layout().pointer_size().bits()) } _ => return_error!(InvalidMonomorphization::UnsupportedSymbol { span, @@ -2309,7 +2305,13 @@ fn generic_simd_intrinsic<'ll, 'tcx>( // Unary integer intrinsics if matches!( name, - sym::simd_bswap | sym::simd_bitreverse | sym::simd_ctlz | sym::simd_ctpop | sym::simd_cttz + sym::simd_bswap + | sym::simd_bitreverse + | sym::simd_ctlz + | sym::simd_ctpop + | sym::simd_cttz + | sym::simd_funnel_shl + | sym::simd_funnel_shr ) { let vec_ty = bx.cx.type_vector( match *in_elem.kind() { @@ -2330,6 +2332,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>( sym::simd_ctlz => "llvm.ctlz", sym::simd_ctpop => "llvm.ctpop", sym::simd_cttz => "llvm.cttz", + sym::simd_funnel_shl => "llvm.fshl", + sym::simd_funnel_shr => "llvm.fshr", _ => unreachable!(), }; let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits(); @@ -2350,6 +2354,11 @@ fn generic_simd_intrinsic<'ll, 'tcx>( // simple unary argument cases Ok(bx.call_intrinsic(llvm_intrinsic, &[vec_ty], &[args[0].immediate()])) } + sym::simd_funnel_shl | sym::simd_funnel_shr => Ok(bx.call_intrinsic( + llvm_intrinsic, + &[vec_ty], + &[args[0].immediate(), args[1].immediate(), args[2].immediate()], + )), _ => unreachable!(), }; } diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index cdfffbe47bf..ca84b6de8b1 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -22,15 +22,16 @@ use std::any::Any; use std::ffi::CStr; use std::mem::ManuallyDrop; +use std::path::PathBuf; use back::owned_target_machine::OwnedTargetMachine; use back::write::{create_informational_target_machine, create_target_machine}; use context::SimpleCx; -use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig}; +use errors::ParseTargetMachineConfig; use llvm_util::target_config; use rustc_ast::expand::allocator::AllocatorKind; use rustc_ast::expand::autodiff_attrs::AutoDiffItem; -use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule}; +use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule}; use rustc_codegen_ssa::back::write::{ CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn, }; @@ -43,7 +44,7 @@ use rustc_middle::dep_graph::{WorkProduct, WorkProductId}; use rustc_middle::ty::TyCtxt; use rustc_middle::util::Providers; use rustc_session::Session; -use rustc_session::config::{Lto, OptLevel, OutputFilenames, PrintKind, PrintRequest}; +use rustc_session::config::{OptLevel, OutputFilenames, PrintKind, PrintRequest}; use rustc_span::Symbol; mod back { @@ -113,7 +114,7 @@ impl ExtraBackendMethods for LlvmCodegenBackend { ) -> ModuleLlvm { let module_llvm = ModuleLlvm::new_metadata(tcx, module_name); let cx = - SimpleCx::new(module_llvm.llmod(), &module_llvm.llcx, tcx.data_layout.pointer_size); + SimpleCx::new(module_llvm.llmod(), &module_llvm.llcx, tcx.data_layout.pointer_size()); unsafe { allocator::codegen(tcx, cx, module_name, kind, alloc_error_handler_kind); } @@ -167,26 +168,40 @@ impl WriteBackendMethods for LlvmCodegenBackend { let stats = llvm::build_string(|s| unsafe { llvm::LLVMRustPrintStatistics(s) }).unwrap(); print!("{stats}"); } - fn run_link( - cgcx: &CodegenContext<Self>, - dcx: DiagCtxtHandle<'_>, - modules: Vec<ModuleCodegen<Self::Module>>, - ) -> Result<ModuleCodegen<Self::Module>, FatalError> { - back::write::link(cgcx, dcx, modules) - } - fn run_fat_lto( + fn run_and_optimize_fat_lto( cgcx: &CodegenContext<Self>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<FatLtoInput<Self>>, - cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, - ) -> Result<LtoModuleCodegen<Self>, FatalError> { - back::lto::run_fat(cgcx, modules, cached_modules) + diff_fncs: Vec<AutoDiffItem>, + ) -> Result<ModuleCodegen<Self::Module>, FatalError> { + let mut module = + back::lto::run_fat(cgcx, exported_symbols_for_lto, each_linked_rlib_for_lto, modules)?; + + if !diff_fncs.is_empty() { + builder::autodiff::differentiate(&module, cgcx, diff_fncs)?; + } + + let dcx = cgcx.create_dcx(); + let dcx = dcx.handle(); + back::lto::run_pass_manager(cgcx, dcx, &mut module, false)?; + + Ok(module) } fn run_thin_lto( cgcx: &CodegenContext<Self>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<(String, Self::ThinBuffer)>, cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, - ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> { - back::lto::run_thin(cgcx, modules, cached_modules) + ) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError> { + back::lto::run_thin( + cgcx, + exported_symbols_for_lto, + each_linked_rlib_for_lto, + modules, + cached_modules, + ) } fn optimize( cgcx: &CodegenContext<Self>, @@ -196,14 +211,6 @@ impl WriteBackendMethods for LlvmCodegenBackend { ) -> Result<(), FatalError> { back::write::optimize(cgcx, dcx, module, config) } - fn optimize_fat( - cgcx: &CodegenContext<Self>, - module: &mut ModuleCodegen<Self::Module>, - ) -> Result<(), FatalError> { - let dcx = cgcx.create_dcx(); - let dcx = dcx.handle(); - back::lto::run_pass_manager(cgcx, dcx, module, false) - } fn optimize_thin( cgcx: &CodegenContext<Self>, thin: ThinModule<Self>, @@ -212,11 +219,10 @@ impl WriteBackendMethods for LlvmCodegenBackend { } fn codegen( cgcx: &CodegenContext<Self>, - dcx: DiagCtxtHandle<'_>, module: ModuleCodegen<Self::Module>, config: &ModuleConfig, ) -> Result<CompiledModule, FatalError> { - back::write::codegen(cgcx, dcx, module, config) + back::write::codegen(cgcx, module, config) } fn prepare_thin( module: ModuleCodegen<Self::Module>, @@ -227,19 +233,6 @@ impl WriteBackendMethods for LlvmCodegenBackend { fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) { (module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod())) } - /// Generate autodiff rules - fn autodiff( - cgcx: &CodegenContext<Self>, - module: &ModuleCodegen<Self::Module>, - diff_fncs: Vec<AutoDiffItem>, - config: &ModuleConfig, - ) -> Result<(), FatalError> { - if cgcx.lto != Lto::Fat { - let dcx = cgcx.create_dcx(); - return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutLTO)); - } - builder::autodiff::differentiate(module, cgcx, diff_fncs, config) - } } impl LlvmCodegenBackend { @@ -423,6 +416,20 @@ impl ModuleLlvm { } } + fn tm_from_cgcx( + cgcx: &CodegenContext<LlvmCodegenBackend>, + name: &str, + dcx: DiagCtxtHandle<'_>, + ) -> Result<OwnedTargetMachine, FatalError> { + let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, name); + match (cgcx.tm_factory)(tm_factory_config) { + Ok(m) => Ok(m), + Err(e) => { + return Err(dcx.emit_almost_fatal(ParseTargetMachineConfig(e))); + } + } + } + fn parse( cgcx: &CodegenContext<LlvmCodegenBackend>, name: &CStr, @@ -432,13 +439,7 @@ impl ModuleLlvm { unsafe { let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); let llmod_raw = back::lto::parse_module(llcx, name, buffer, dcx)?; - let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, name.to_str().unwrap()); - let tm = match (cgcx.tm_factory)(tm_factory_config) { - Ok(m) => m, - Err(e) => { - return Err(dcx.emit_almost_fatal(ParseTargetMachineConfig(e))); - } - }; + let tm = ModuleLlvm::tm_from_cgcx(cgcx, name.to_str().unwrap(), dcx)?; Ok(ModuleLlvm { llmod_raw, llcx, tm: ManuallyDrop::new(tm) }) } diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs index 2ad39fc8538..56d756e52cc 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs @@ -1,11 +1,10 @@ -#![allow(non_camel_case_types)] #![expect(dead_code)] use libc::{c_char, c_uint}; use super::MetadataKindId; use super::ffi::{AttributeKind, BasicBlock, Metadata, Module, Type, Value}; -use crate::llvm::Bool; +use crate::llvm::{Bool, Builder}; #[link(name = "llvm-wrapper", kind = "static")] unsafe extern "C" { @@ -32,6 +31,14 @@ unsafe extern "C" { index: c_uint, kind: AttributeKind, ); + pub(crate) fn LLVMRustPositionBefore<'a>(B: &'a Builder<'_>, I: &'a Value); + pub(crate) fn LLVMRustPositionAfter<'a>(B: &'a Builder<'_>, I: &'a Value); + pub(crate) fn LLVMRustGetFunctionCall( + F: &Value, + name: *const c_char, + NameLen: libc::size_t, + ) -> Option<&Value>; + } unsafe extern "C" { @@ -40,7 +47,7 @@ unsafe extern "C" { pub(crate) fn LLVMDumpValue(V: &Value); pub(crate) fn LLVMGetFunctionCallConv(F: &Value) -> c_uint; pub(crate) fn LLVMGetReturnType(T: &Type) -> &Type; - pub(crate) fn LLVMGetParams(Fnc: &Value, parms: *mut &Value); + pub(crate) fn LLVMGetParams(Fnc: &Value, params: *mut &Value); pub(crate) fn LLVMGetNamedFunction(M: &Module, Name: *const c_char) -> Option<&Value>; } @@ -57,14 +64,19 @@ pub(crate) use self::Enzyme_AD::*; #[cfg(llvm_enzyme)] pub(crate) mod Enzyme_AD { + use std::ffi::{CString, c_char}; + use libc::c_void; + unsafe extern "C" { pub(crate) fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8); + pub(crate) fn EnzymeSetCLString(arg1: *mut ::std::os::raw::c_void, arg2: *const c_char); } unsafe extern "C" { static mut EnzymePrintPerf: c_void; static mut EnzymePrintActivity: c_void; static mut EnzymePrintType: c_void; + static mut EnzymeFunctionToAnalyze: c_void; static mut EnzymePrint: c_void; static mut EnzymeStrictAliasing: c_void; static mut looseTypeAnalysis: c_void; @@ -86,6 +98,15 @@ pub(crate) mod Enzyme_AD { EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8); } } + pub(crate) fn set_print_type_fun(fun_name: &str) { + let c_fun_name = CString::new(fun_name).unwrap(); + unsafe { + EnzymeSetCLString( + std::ptr::addr_of_mut!(EnzymeFunctionToAnalyze), + c_fun_name.as_ptr() as *const c_char, + ); + } + } pub(crate) fn set_print(print: bool) { unsafe { EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8); @@ -132,6 +153,9 @@ pub(crate) mod Fallback_AD { pub(crate) fn set_print_type(print: bool) { unimplemented!() } + pub(crate) fn set_print_type_fun(fun_name: &str) { + unimplemented!() + } pub(crate) fn set_print(print: bool) { unimplemented!() } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 91ada856d59..ad3c3d5932e 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -97,6 +97,16 @@ pub(crate) enum ModuleFlagMergeBehavior { // Consts for the LLVM CallConv type, pre-cast to usize. +#[derive(Copy, Clone, PartialEq, Debug)] +#[repr(C)] +#[allow(dead_code)] +pub(crate) enum TailCallKind { + None = 0, + Tail = 1, + MustTail = 2, + NoTail = 3, +} + /// LLVM CallingConv::ID. Should we wrap this? /// /// See <https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/CallingConv.h> @@ -239,6 +249,7 @@ pub(crate) enum AttributeKind { FnRetThunkExtern = 41, Writable = 42, DeadOnUnwind = 43, + DeadOnReturn = 44, } /// LLVMIntPredicate @@ -1009,7 +1020,7 @@ unsafe extern "C" { ModuleID: *const c_char, C: &Context, ) -> &Module; - pub(crate) fn LLVMCloneModule(M: &Module) -> &Module; + pub(crate) safe fn LLVMCloneModule(M: &Module) -> &Module; /// Data layout. See Module::getDataLayout. pub(crate) fn LLVMGetDataLayoutStr(M: &Module) -> *const c_char; @@ -1138,6 +1149,11 @@ unsafe extern "C" { Count: c_uint, Packed: Bool, ) -> &'a Value; + pub(crate) fn LLVMConstNamedStruct<'a>( + StructTy: &'a Type, + ConstantVals: *const &'a Value, + Count: c_uint, + ) -> &'a Value; pub(crate) fn LLVMConstVector(ScalarConstantVals: *const &Value, Size: c_uint) -> &Value; // Constant expressions @@ -1168,19 +1184,20 @@ unsafe extern "C" { pub(crate) fn LLVMGlobalGetValueType(Global: &Value) -> &Type; // Operations on global variables - pub(crate) fn LLVMIsAGlobalVariable(GlobalVar: &Value) -> Option<&Value>; + pub(crate) safe fn LLVMIsAGlobalVariable(GlobalVar: &Value) -> Option<&Value>; pub(crate) fn LLVMAddGlobal<'a>(M: &'a Module, Ty: &'a Type, Name: *const c_char) -> &'a Value; pub(crate) fn LLVMGetNamedGlobal(M: &Module, Name: *const c_char) -> Option<&Value>; pub(crate) fn LLVMGetFirstGlobal(M: &Module) -> Option<&Value>; pub(crate) fn LLVMGetNextGlobal(GlobalVar: &Value) -> Option<&Value>; pub(crate) fn LLVMDeleteGlobal(GlobalVar: &Value); - pub(crate) fn LLVMGetInitializer(GlobalVar: &Value) -> Option<&Value>; + pub(crate) safe fn LLVMGetInitializer(GlobalVar: &Value) -> Option<&Value>; pub(crate) fn LLVMSetInitializer<'a>(GlobalVar: &'a Value, ConstantVal: &'a Value); - pub(crate) fn LLVMIsThreadLocal(GlobalVar: &Value) -> Bool; + pub(crate) safe fn LLVMIsThreadLocal(GlobalVar: &Value) -> Bool; pub(crate) fn LLVMSetThreadLocalMode(GlobalVar: &Value, Mode: ThreadLocalMode); - pub(crate) fn LLVMIsGlobalConstant(GlobalVar: &Value) -> Bool; - pub(crate) fn LLVMSetGlobalConstant(GlobalVar: &Value, IsConstant: Bool); + pub(crate) safe fn LLVMIsGlobalConstant(GlobalVar: &Value) -> Bool; + pub(crate) safe fn LLVMSetGlobalConstant(GlobalVar: &Value, IsConstant: Bool); pub(crate) safe fn LLVMSetTailCall(CallInst: &Value, IsTailCall: Bool); + pub(crate) safe fn LLVMRustSetTailCallKind(CallInst: &Value, Kind: TailCallKind); // Operations on attributes pub(crate) fn LLVMCreateStringAttribute( @@ -1217,6 +1234,8 @@ unsafe extern "C" { ) -> &'a BasicBlock; // Operations on instructions + pub(crate) fn LLVMGetInstructionParent(Inst: &Value) -> &BasicBlock; + pub(crate) fn LLVMGetCalledValue(CallInst: &Value) -> Option<&Value>; pub(crate) fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>; pub(crate) fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock; pub(crate) fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>; @@ -1492,12 +1511,6 @@ unsafe extern "C" { Ty: &'a Type, Name: *const c_char, ) -> &'a Value; - pub(crate) fn LLVMBuildArrayAlloca<'a>( - B: &Builder<'a>, - Ty: &'a Type, - Val: &'a Value, - Name: *const c_char, - ) -> &'a Value; pub(crate) fn LLVMBuildLoad2<'a>( B: &Builder<'a>, Ty: &'a Type, @@ -1724,7 +1737,7 @@ unsafe extern "C" { pub(crate) safe fn LLVMMetadataAsValue<'a>(C: &'a Context, MD: &'a Metadata) -> &'a Value; - pub(crate) fn LLVMSetUnnamedAddress(Global: &Value, UnnamedAddr: UnnamedAddr); + pub(crate) safe fn LLVMSetUnnamedAddress(Global: &Value, UnnamedAddr: UnnamedAddr); pub(crate) fn LLVMIsAConstantInt(value_ref: &Value) -> Option<&ConstantInt>; @@ -1980,12 +1993,12 @@ unsafe extern "C" { pub(crate) fn LLVMRustBuildMinNum<'a>( B: &Builder<'a>, LHS: &'a Value, - LHS: &'a Value, + RHS: &'a Value, ) -> &'a Value; pub(crate) fn LLVMRustBuildMaxNum<'a>( B: &Builder<'a>, LHS: &'a Value, - LHS: &'a Value, + RHS: &'a Value, ) -> &'a Value; // Atomic Operations @@ -2044,10 +2057,6 @@ unsafe extern "C" { NumExpansionRegions: size_t, BranchRegions: *const crate::coverageinfo::ffi::BranchRegion, NumBranchRegions: size_t, - MCDCBranchRegions: *const crate::coverageinfo::ffi::MCDCBranchRegion, - NumMCDCBranchRegions: size_t, - MCDCDecisionRegions: *const crate::coverageinfo::ffi::MCDCDecisionRegion, - NumMCDCDecisionRegions: size_t, BufferOut: &RustString, ); @@ -2431,6 +2440,7 @@ unsafe extern "C" { UseEmulatedTls: bool, ArgsCstrBuff: *const c_char, ArgsCstrBuffLen: usize, + UseWasmEH: bool, ) -> *mut TargetMachine; pub(crate) fn LLVMRustDisposeTargetMachine(T: *mut TargetMachine); @@ -2562,6 +2572,7 @@ unsafe extern "C" { pub(crate) fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine); + pub(crate) fn LLVMRustPositionBuilderPastAllocas<'a>(B: &Builder<'a>, Fn: &'a Value); pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock); pub(crate) fn LLVMRustSetModulePICLevel(M: &Module); @@ -2609,13 +2620,6 @@ unsafe extern "C" { len: usize, Identifier: *const c_char, ) -> Option<&Module>; - pub(crate) fn LLVMRustGetSliceFromObjectDataByName( - data: *const u8, - len: usize, - name: *const u8, - name_len: usize, - out_len: &mut usize, - ) -> *const u8; pub(crate) fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>; pub(crate) fn LLVMRustLinkerAdd( diff --git a/compiler/rustc_codegen_llvm/src/llvm/mod.rs b/compiler/rustc_codegen_llvm/src/llvm/mod.rs index 661174a80df..154ba4fd690 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/mod.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/mod.rs @@ -211,16 +211,14 @@ pub(crate) fn SetFunctionCallConv(fn_: &Value, cc: CallConv) { // function. // For more details on COMDAT sections see e.g., https://www.airs.com/blog/archives/52 pub(crate) fn SetUniqueComdat(llmod: &Module, val: &Value) { - let name_buf = get_value_name(val).to_vec(); + let name_buf = get_value_name(val); let name = CString::from_vec_with_nul(name_buf).or_else(|buf| CString::new(buf.into_bytes())).unwrap(); set_comdat(llmod, val, &name); } -pub(crate) fn SetUnnamedAddress(global: &Value, unnamed: UnnamedAddr) { - unsafe { - LLVMSetUnnamedAddress(global, unnamed); - } +pub(crate) fn set_unnamed_address(global: &Value, unnamed: UnnamedAddr) { + LLVMSetUnnamedAddress(global, unnamed); } pub(crate) fn set_thread_local_mode(global: &Value, mode: ThreadLocalMode) { @@ -260,9 +258,7 @@ pub(crate) fn set_initializer(llglobal: &Value, constant_val: &Value) { } pub(crate) fn set_global_constant(llglobal: &Value, is_constant: bool) { - unsafe { - LLVMSetGlobalConstant(llglobal, if is_constant { ffi::True } else { ffi::False }); - } + LLVMSetGlobalConstant(llglobal, if is_constant { ffi::True } else { ffi::False }); } pub(crate) fn get_linkage(llglobal: &Value) -> Linkage { @@ -319,12 +315,14 @@ pub(crate) fn get_param(llfn: &Value, index: c_uint) -> &Value { } } -/// Safe wrapper for `LLVMGetValueName2` into a byte slice -pub(crate) fn get_value_name(value: &Value) -> &[u8] { +/// Safe wrapper for `LLVMGetValueName2` +/// Needs to allocate the value, because `set_value_name` will invalidate +/// the pointer. +pub(crate) fn get_value_name(value: &Value) -> Vec<u8> { unsafe { let mut len = 0; let data = LLVMGetValueName2(value, &mut len); - std::slice::from_raw_parts(data.cast(), len) + std::slice::from_raw_parts(data.cast(), len).to_vec() } } diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 0e77bc43df8..28d2100f478 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -6,27 +6,20 @@ use std::sync::Once; use std::{ptr, slice, str}; use libc::c_int; -use rustc_codegen_ssa::TargetConfig; use rustc_codegen_ssa::base::wants_wasm_eh; -use rustc_codegen_ssa::codegen_attrs::check_tied_features; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_codegen_ssa::target_features::cfg_target_feature; +use rustc_codegen_ssa::{TargetConfig, target_features}; +use rustc_data_structures::fx::FxHashSet; use rustc_data_structures::small_c_str::SmallCStr; -use rustc_data_structures::unord::UnordSet; use rustc_fs_util::path_to_c_string; use rustc_middle::bug; use rustc_session::Session; use rustc_session::config::{PrintKind, PrintRequest}; -use rustc_session::features::{StabilityExt, retpoline_features_by_flags}; -use rustc_span::Symbol; use rustc_target::spec::{MergeFunctions, PanicStrategy, SmallDataThresholdSupport}; -use rustc_target::target_features::{RUSTC_SPECIAL_FEATURES, RUSTC_SPECIFIC_FEATURES}; use smallvec::{SmallVec, smallvec}; use crate::back::write::create_informational_target_machine; -use crate::errors::{ - FixedX18InvalidArch, PossibleFeature, UnknownCTargetFeature, UnknownCTargetFeaturePrefix, -}; -use crate::llvm; +use crate::{errors, llvm}; static INIT: Once = Once::new(); @@ -195,15 +188,6 @@ impl<'a> LLVMFeature<'a> { ) -> Self { Self { llvm_feature_name, dependencies } } - - fn contains(&'a self, feat: &str) -> bool { - self.iter().any(|dep| dep == feat) - } - - fn iter(&'a self) -> impl Iterator<Item = &'a str> { - let dependencies = self.dependencies.iter().map(|feat| feat.as_str()); - std::iter::once(self.llvm_feature_name).chain(dependencies) - } } impl<'a> IntoIterator for LLVMFeature<'a> { @@ -216,18 +200,22 @@ impl<'a> IntoIterator for LLVMFeature<'a> { } } -// WARNING: the features after applying `to_llvm_features` must be known -// to LLVM or the feature detection code will walk past the end of the feature -// array, leading to crashes. -// -// To find a list of LLVM's names, see llvm-project/llvm/lib/Target/{ARCH}/*.td -// where `{ARCH}` is the architecture name. Look for instances of `SubtargetFeature`. -// -// Check the current rustc fork of LLVM in the repo at https://github.com/rust-lang/llvm-project/. -// The commit in use can be found via the `llvm-project` submodule in -// https://github.com/rust-lang/rust/tree/master/src Though note that Rust can also be build with -// an external precompiled version of LLVM which might lead to failures if the oldest tested / -// supported LLVM version doesn't yet support the relevant intrinsics. +/// Convert a Rust feature name to an LLVM feature name. Returning `None` means the +/// feature should be skipped, usually because it is not supported by the current +/// LLVM version. +/// +/// WARNING: the features after applying `to_llvm_features` must be known +/// to LLVM or the feature detection code will walk past the end of the feature +/// array, leading to crashes. +/// +/// To find a list of LLVM's names, see llvm-project/llvm/lib/Target/{ARCH}/*.td +/// where `{ARCH}` is the architecture name. Look for instances of `SubtargetFeature`. +/// +/// Check the current rustc fork of LLVM in the repo at +/// <https://github.com/rust-lang/llvm-project/>. The commit in use can be found via the +/// `llvm-project` submodule in <https://github.com/rust-lang/rust/tree/master/src> Though note that +/// Rust can also be build with an external precompiled version of LLVM which might lead to failures +/// if the oldest tested / supported LLVM version doesn't yet support the relevant intrinsics. pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFeature<'a>> { let arch = if sess.target.arch == "x86_64" { "x86" @@ -274,6 +262,15 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea // Filter out features that are not supported by the current LLVM version ("aarch64", "fpmr") => None, // only existed in 18 ("arm", "fp16") => Some(LLVMFeature::new("fullfp16")), + // NVPTX targets added in LLVM 20 + ("nvptx64", "sm_100") if get_version().0 < 20 => None, + ("nvptx64", "sm_100a") if get_version().0 < 20 => None, + ("nvptx64", "sm_101") if get_version().0 < 20 => None, + ("nvptx64", "sm_101a") if get_version().0 < 20 => None, + ("nvptx64", "sm_120") if get_version().0 < 20 => None, + ("nvptx64", "sm_120a") if get_version().0 < 20 => None, + ("nvptx64", "ptx86") if get_version().0 < 20 => None, + ("nvptx64", "ptx87") if get_version().0 < 20 => None, // Filter out features that are not supported by the current LLVM version ("loongarch64", "div32" | "lam-bh" | "lamcas" | "ld-seq-sa" | "scq") if get_version().0 < 20 => @@ -336,105 +333,29 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea /// /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen. pub(crate) fn target_config(sess: &Session) -> TargetConfig { - // Add base features for the target. - // We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below. - // The reason is that if LLVM considers a feature implied but we do not, we don't want that to - // show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of - // the target CPU, that is still expanded to target features (with all their implied features) - // by LLVM. let target_machine = create_informational_target_machine(sess, true); - // Compute which of the known target features are enabled in the 'base' target machine. We only - // consider "supported" features; "forbidden" features are not reflected in `cfg` as of now. - let mut features: FxHashSet<Symbol> = sess - .target - .rust_target_features() - .iter() - .filter(|(feature, _, _)| { - // skip checking special features, as LLVM may not understand them - if RUSTC_SPECIAL_FEATURES.contains(feature) { - return true; - } - if let Some(feat) = to_llvm_features(sess, feature) { - for llvm_feature in feat { - let cstr = SmallCStr::new(llvm_feature); - // `LLVMRustHasFeature` is moderately expensive. On targets with many - // features (e.g. x86) these calls take a non-trivial fraction of runtime - // when compiling very small programs. - if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } { - return false; - } + + let (unstable_target_features, target_features) = cfg_target_feature(sess, |feature| { + // This closure determines whether the target CPU has the feature according to LLVM. We do + // *not* consider the `-Ctarget-feature`s here, as that will be handled later in + // `cfg_target_feature`. + if let Some(feat) = to_llvm_features(sess, feature) { + // All the LLVM features this expands to must be enabled. + for llvm_feature in feat { + let cstr = SmallCStr::new(llvm_feature); + // `LLVMRustHasFeature` is moderately expensive. On targets with many + // features (e.g. x86) these calls take a non-trivial fraction of runtime + // when compiling very small programs. + if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } { + return false; } - true - } else { - false } - }) - .map(|(feature, _, _)| Symbol::intern(feature)) - .collect(); - - // Add enabled and remove disabled features. - for (enabled, feature) in - sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() { - Some('+') => Some((true, Symbol::intern(&s[1..]))), - Some('-') => Some((false, Symbol::intern(&s[1..]))), - _ => None, - }) - { - if enabled { - // Also add all transitively implied features. - - // We don't care about the order in `features` since the only thing we use it for is the - // `features.contains` below. - #[allow(rustc::potential_query_instability)] - features.extend( - sess.target - .implied_target_features(feature.as_str()) - .iter() - .map(|s| Symbol::intern(s)), - ); + true } else { - // Remove transitively reverse-implied features. - - // We don't care about the order in `features` since the only thing we use it for is the - // `features.contains` below. - #[allow(rustc::potential_query_instability)] - features.retain(|f| { - if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) { - // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to - // remove `f`. (This is the standard logical contraposition principle.) - false - } else { - // We can keep `f`. - true - } - }); + false } - } + }); - // Filter enabled features based on feature gates. - let f = |allow_unstable| { - sess.target - .rust_target_features() - .iter() - .filter_map(|(feature, gate, _)| { - // The `allow_unstable` set is used by rustc internally to determined which target - // features are truly available, so we want to return even perma-unstable - // "forbidden" features. - if allow_unstable - || (gate.in_cfg() - && (sess.is_nightly_build() || gate.requires_nightly().is_none())) - { - Some(Symbol::intern(feature)) - } else { - None - } - }) - .filter(|feature| features.contains(&feature)) - .collect() - }; - - let target_features = f(false); - let unstable_target_features = f(true); let mut cfg = TargetConfig { target_features, unstable_target_features, @@ -455,17 +376,26 @@ fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) { let target_env = sess.target.options.env.as_ref(); let target_abi = sess.target.options.abi.as_ref(); let target_pointer_width = sess.target.pointer_width; + let version = get_version(); + let lt_20_1_1 = version < (20, 1, 1); + let lt_21_0_0 = version < (21, 0, 0); cfg.has_reliable_f16 = match (target_arch, target_os) { - // Selection failure <https://github.com/llvm/llvm-project/issues/50374> - ("s390x", _) => false, + // LLVM crash without neon <https://github.com/llvm/llvm-project/issues/129394> (fixed in llvm20) + ("aarch64", _) + if !cfg.target_features.iter().any(|f| f.as_str() == "neon") && lt_20_1_1 => + { + false + } // Unsupported <https://github.com/llvm/llvm-project/issues/94434> ("arm64ec", _) => false, + // Selection failure <https://github.com/llvm/llvm-project/issues/50374> (fixed in llvm21) + ("s390x", _) if lt_21_0_0 => false, // MinGW ABI bugs <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054> ("x86_64", "windows") if target_env == "gnu" && target_abi != "llvm" => false, // Infinite recursion <https://github.com/llvm/llvm-project/issues/97981> ("csky", _) => false, - ("hexagon", _) => false, + ("hexagon", _) if lt_21_0_0 => false, // (fixed in llvm21) ("powerpc" | "powerpc64", _) => false, ("sparc" | "sparc64", _) => false, ("wasm32" | "wasm64", _) => false, @@ -478,18 +408,21 @@ fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) { cfg.has_reliable_f128 = match (target_arch, target_os) { // Unsupported <https://github.com/llvm/llvm-project/issues/94434> ("arm64ec", _) => false, - // Selection bug <https://github.com/llvm/llvm-project/issues/96432> - ("mips64" | "mips64r6", _) => false, - // Selection bug <https://github.com/llvm/llvm-project/issues/95471> + // Selection bug <https://github.com/llvm/llvm-project/issues/96432> (fixed in llvm20) + ("mips64" | "mips64r6", _) if lt_20_1_1 => false, + // Selection bug <https://github.com/llvm/llvm-project/issues/95471>. This issue is closed + // but basic math still does not work. ("nvptx64", _) => false, + // Unsupported https://github.com/llvm/llvm-project/issues/121122 + ("amdgpu", _) => false, // ABI bugs <https://github.com/rust-lang/rust/issues/125109> et al. (full // list at <https://github.com/rust-lang/rust/issues/116909>) ("powerpc" | "powerpc64", _) => false, // ABI unsupported <https://github.com/llvm/llvm-project/issues/41838> ("sparc", _) => false, // Stack alignment bug <https://github.com/llvm/llvm-project/issues/77401>. NB: tests may - // not fail if our compiler-builtins is linked. - ("x86", _) => false, + // not fail if our compiler-builtins is linked. (fixed in llvm21) + ("x86", _) if lt_21_0_0 => false, // MinGW ABI bugs <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054> ("x86_64", "windows") if target_env == "gnu" && target_abi != "llvm" => false, // There are no known problems on other platforms, so the only requirement is that symbols @@ -510,6 +443,9 @@ fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) { // This rules out anything that doesn't have `long double` = `binary128`; <= 32 bits // (ld is `f64`), anything other than Linux (Windows and MacOS use `f64`), and `x86` // (ld is 80-bit extended precision). + // + // musl does not implement the symbols required for f128 math at all. + _ if target_env == "musl" => false, ("x86_64", _) => false, (_, "linux") if target_pointer_width == 64 => true, _ => false, @@ -707,10 +643,18 @@ pub(crate) fn target_cpu(sess: &Session) -> &str { handle_native(cpu_name) } -fn llvm_features_by_flags(sess: &Session) -> Vec<&str> { - let mut features: Vec<&str> = Vec::new(); - retpoline_features_by_flags(sess, &mut features); - features +/// The target features for compiler flags other than `-Ctarget-features`. +fn llvm_features_by_flags(sess: &Session, features: &mut Vec<String>) { + target_features::retpoline_features_by_flags(sess, features); + + // -Zfixed-x18 + if sess.opts.unstable_opts.fixed_x18 { + if sess.target.arch != "aarch64" { + sess.dcx().emit_fatal(errors::FixedX18InvalidArch { arch: &sess.target.arch }); + } else { + features.push("+reserve-x18".into()); + } + } } /// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`, @@ -777,6 +721,8 @@ pub(crate) fn global_llvm_features( .split(',') .filter(|v| !v.is_empty()) // Drop +v8plus feature introduced in LLVM 20. + // (Hard-coded target features do not go through `to_llvm_feature` since they already + // are LLVM feature names, hence we need a special case here.) .filter(|v| *v != "+v8plus" || get_version() >= (20, 0, 0)) .map(String::from), ); @@ -787,86 +733,23 @@ pub(crate) fn global_llvm_features( // -Ctarget-features if !only_base_features { - let known_features = sess.target.rust_target_features(); - // Will only be filled when `diagnostics` is set! - let mut featsmap = FxHashMap::default(); - - // Compute implied features - let mut all_rust_features = vec![]; - for feature in sess.opts.cg.target_feature.split(',').chain(llvm_features_by_flags(sess)) { - if let Some(feature) = feature.strip_prefix('+') { - all_rust_features.extend( - UnordSet::from(sess.target.implied_target_features(feature)) - .to_sorted_stable_ord() - .iter() - .map(|&&s| (true, s)), - ) - } else if let Some(feature) = feature.strip_prefix('-') { - // FIXME: Why do we not remove implied features on "-" here? - // We do the equivalent above in `target_config`. - // See <https://github.com/rust-lang/rust/issues/134792>. - all_rust_features.push((false, feature)); - } else if !feature.is_empty() { - if diagnostics { - sess.dcx().emit_warn(UnknownCTargetFeaturePrefix { feature }); - } - } - } - // Remove features that are meant for rustc, not LLVM. - all_rust_features.retain(|(_, feature)| { - // Retain if it is not a rustc feature - !RUSTC_SPECIFIC_FEATURES.contains(feature) - }); - - // Check feature validity. - if diagnostics { - for &(enable, feature) in &all_rust_features { - let feature_state = known_features.iter().find(|&&(v, _, _)| v == feature); - match feature_state { - None => { - let rust_feature = - known_features.iter().find_map(|&(rust_feature, _, _)| { - let llvm_features = to_llvm_features(sess, rust_feature)?; - if llvm_features.contains(feature) - && !llvm_features.contains(rust_feature) - { - Some(rust_feature) - } else { - None - } - }); - let unknown_feature = if let Some(rust_feature) = rust_feature { - UnknownCTargetFeature { - feature, - rust_feature: PossibleFeature::Some { rust_feature }, - } - } else { - UnknownCTargetFeature { feature, rust_feature: PossibleFeature::None } - }; - sess.dcx().emit_warn(unknown_feature); - } - Some((_, stability, _)) => { - stability.verify_feature_enabled_by_flag(sess, enable, feature); - } - } - - // FIXME(nagisa): figure out how to not allocate a full hashset here. - featsmap.insert(feature, enable); - } - } - - // Translate this into LLVM features. - let feats = all_rust_features - .iter() - .filter_map(|&(enable, feature)| { + target_features::flag_to_backend_features( + sess, + diagnostics, + |feature| { + to_llvm_features(sess, feature) + .map(|f| SmallVec::<[&str; 2]>::from_iter(f.into_iter())) + .unwrap_or_default() + }, + |feature, enable| { let enable_disable = if enable { '+' } else { '-' }; // We run through `to_llvm_features` when // passing requests down to LLVM. This means that all in-language // features also work on the command line instead of having two // different names when the LLVM name and the Rust name differ. - let llvm_feature = to_llvm_features(sess, feature)?; + let Some(llvm_feature) = to_llvm_features(sess, feature) else { return }; - Some( + features.extend( std::iter::once(format!( "{}{}", enable_disable, llvm_feature.llvm_feature_name @@ -881,27 +764,12 @@ pub(crate) fn global_llvm_features( }, )), ) - }) - .flatten(); - features.extend(feats); - - if diagnostics && let Some(f) = check_tied_features(sess, &featsmap) { - sess.dcx().emit_err(rustc_codegen_ssa::errors::TargetFeatureDisableOrEnable { - features: f, - span: None, - missing_features: None, - }); - } + }, + ); } - // -Zfixed-x18 - if sess.opts.unstable_opts.fixed_x18 { - if sess.target.arch != "aarch64" { - sess.dcx().emit_fatal(FixedX18InvalidArch { arch: &sess.target.arch }); - } else { - features.push("+reserve-x18".into()); - } - } + // We add this in the "base target" so that these show up in `sess.unstable_target_features`. + llvm_features_by_flags(sess, &mut features); features } diff --git a/compiler/rustc_codegen_llvm/src/mono_item.rs b/compiler/rustc_codegen_llvm/src/mono_item.rs index 3f38e1e191b..f9edaded60d 100644 --- a/compiler/rustc_codegen_llvm/src/mono_item.rs +++ b/compiler/rustc_codegen_llvm/src/mono_item.rs @@ -55,8 +55,8 @@ impl<'tcx> PreDefineCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> { let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty()); let lldecl = self.declare_fn(symbol_name, fn_abi, Some(instance)); llvm::set_linkage(lldecl, base::linkage_to_llvm(linkage)); - let attrs = self.tcx.codegen_fn_attrs(instance.def_id()); - base::set_link_section(lldecl, attrs); + let attrs = self.tcx.codegen_instance_attrs(instance.def); + base::set_link_section(lldecl, &attrs); if (linkage == Linkage::LinkOnceODR || linkage == Linkage::WeakODR) && self.tcx.sess.target.supports_comdat() { @@ -131,8 +131,8 @@ impl CodegenCx<'_, '_> { } // Thread-local variables generally don't support copy relocations. - let is_thread_local_var = unsafe { llvm::LLVMIsAGlobalVariable(llval) } - .is_some_and(|v| unsafe { llvm::LLVMIsThreadLocal(v) } == llvm::True); + let is_thread_local_var = llvm::LLVMIsAGlobalVariable(llval) + .is_some_and(|v| llvm::LLVMIsThreadLocal(v) == llvm::True); if is_thread_local_var { return false; } diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs index 453eca2bbe1..89365503138 100644 --- a/compiler/rustc_codegen_llvm/src/type_.rs +++ b/compiler/rustc_codegen_llvm/src/type_.rs @@ -2,7 +2,7 @@ use std::borrow::Borrow; use std::hash::{Hash, Hasher}; use std::{fmt, ptr}; -use libc::{c_char, c_uint}; +use libc::c_uint; use rustc_abi::{AddressSpace, Align, Integer, Reg, Size}; use rustc_codegen_ssa::common::TypeKind; use rustc_codegen_ssa::traits::*; @@ -208,7 +208,7 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> { } fn type_ptr(&self) -> &'ll Type { - self.type_ptr_ext(AddressSpace::DATA) + self.type_ptr_ext(AddressSpace::ZERO) } fn type_ptr_ext(&self, address_space: AddressSpace) -> &'ll Type { @@ -258,7 +258,7 @@ impl Type { } pub(crate) fn ptr_llcx(llcx: &llvm::Context) -> &Type { - unsafe { llvm::LLVMPointerTypeInContext(llcx, AddressSpace::DATA.0) } + unsafe { llvm::LLVMPointerTypeInContext(llcx, AddressSpace::ZERO.0) } } } @@ -298,8 +298,8 @@ impl<'ll, 'tcx> LayoutTypeCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { } impl<'ll, 'tcx> TypeMembershipCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { - fn add_type_metadata(&self, function: &'ll Value, typeid: String) { - let typeid_metadata = self.typeid_metadata(typeid).unwrap(); + fn add_type_metadata(&self, function: &'ll Value, typeid: &[u8]) { + let typeid_metadata = self.create_metadata(typeid); unsafe { let v = [llvm::LLVMValueAsMetadata(self.const_usize(0)), typeid_metadata]; llvm::LLVMRustGlobalAddMetadata( @@ -310,8 +310,8 @@ impl<'ll, 'tcx> TypeMembershipCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { } } - fn set_type_metadata(&self, function: &'ll Value, typeid: String) { - let typeid_metadata = self.typeid_metadata(typeid).unwrap(); + fn set_type_metadata(&self, function: &'ll Value, typeid: &[u8]) { + let typeid_metadata = self.create_metadata(typeid); unsafe { let v = [llvm::LLVMValueAsMetadata(self.const_usize(0)), typeid_metadata]; llvm::LLVMGlobalSetMetadata( @@ -322,10 +322,8 @@ impl<'ll, 'tcx> TypeMembershipCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { } } - fn typeid_metadata(&self, typeid: String) -> Option<&'ll Metadata> { - Some(unsafe { - llvm::LLVMMDStringInContext2(self.llcx, typeid.as_ptr() as *const c_char, typeid.len()) - }) + fn typeid_metadata(&self, typeid: &[u8]) -> Option<&'ll Metadata> { + Some(self.create_metadata(typeid)) } fn add_kcfi_type_metadata(&self, function: &'ll Value, kcfi_typeid: u32) { diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs index 236568590be..ce079f3cb0a 100644 --- a/compiler/rustc_codegen_llvm/src/va_arg.rs +++ b/compiler/rustc_codegen_llvm/src/va_arg.rs @@ -45,7 +45,8 @@ fn emit_direct_ptr_va_arg<'ll, 'tcx>( let va_list_ty = bx.type_ptr(); let va_list_addr = list.immediate(); - let ptr = bx.load(va_list_ty, va_list_addr, bx.tcx().data_layout.pointer_align.abi); + let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi; + let ptr = bx.load(va_list_ty, va_list_addr, ptr_align_abi); let (addr, addr_align) = if allow_higher_align && align > slot_size { (round_pointer_up_to_alignment(bx, ptr, align, bx.type_ptr()), align) @@ -56,7 +57,7 @@ fn emit_direct_ptr_va_arg<'ll, 'tcx>( let aligned_size = size.align_to(slot_size).bytes() as i32; let full_direct_size = bx.cx().const_i32(aligned_size); let next = bx.inbounds_ptradd(addr, full_direct_size); - bx.store(next, va_list_addr, bx.tcx().data_layout.pointer_align.abi); + bx.store(next, va_list_addr, ptr_align_abi); if size.bytes() < slot_size.bytes() && bx.tcx().sess.target.endian == Endian::Big @@ -108,8 +109,8 @@ fn emit_ptr_va_arg<'ll, 'tcx>( let (llty, size, align) = if indirect { ( bx.cx.layout_of(Ty::new_imm_ptr(bx.cx.tcx, target_ty)).llvm_type(bx.cx), - bx.cx.data_layout().pointer_size, - bx.cx.data_layout().pointer_align, + bx.cx.data_layout().pointer_size(), + bx.cx.data_layout().pointer_align(), ) } else { (layout.llvm_type(bx.cx), layout.size, layout.align) @@ -172,10 +173,10 @@ fn emit_aapcs_va_arg<'ll, 'tcx>( let gr_type = target_ty.is_any_ptr() || target_ty.is_integral(); let (reg_off, reg_top, slot_size) = if gr_type { - let nreg = (layout.size.bytes() + 7) / 8; + let nreg = layout.size.bytes().div_ceil(8); (gr_offs, gr_top, nreg * 8) } else { - let nreg = (layout.size.bytes() + 15) / 16; + let nreg = layout.size.bytes().div_ceil(16); (vr_offs, vr_top, nreg * 16) }; @@ -204,7 +205,7 @@ fn emit_aapcs_va_arg<'ll, 'tcx>( bx.switch_to_block(in_reg); let top_type = bx.type_ptr(); - let top = bx.load(top_type, reg_top, dl.pointer_align.abi); + let top = bx.load(top_type, reg_top, dl.pointer_align().abi); // reg_value = *(@top + reg_off_v); let mut reg_addr = bx.ptradd(top, reg_off_v); @@ -297,6 +298,7 @@ fn emit_powerpc_va_arg<'ll, 'tcx>( let max_regs = 8u8; let use_regs = bx.icmp(IntPredicate::IntULT, num_regs, bx.const_u8(max_regs)); + let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi; let in_reg = bx.append_sibling_block("va_arg.in_reg"); let in_mem = bx.append_sibling_block("va_arg.in_mem"); @@ -308,7 +310,7 @@ fn emit_powerpc_va_arg<'ll, 'tcx>( bx.switch_to_block(in_reg); let reg_safe_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2 + 4)); - let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, dl.pointer_align.abi); + let mut reg_addr = bx.load(bx.type_ptr(), reg_safe_area_ptr, ptr_align_abi); // Floating-point registers start after the general-purpose registers. if !is_int && !is_soft_float_abi { @@ -342,11 +344,11 @@ fn emit_powerpc_va_arg<'ll, 'tcx>( let size = if !is_indirect { layout.layout.size.align_to(overflow_area_align) } else { - dl.pointer_size + dl.pointer_size() }; let overflow_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(1 + 1 + 2)); - let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, dl.pointer_align.abi); + let mut overflow_area = bx.load(bx.type_ptr(), overflow_area_ptr, ptr_align_abi); // Round up address of argument to alignment if layout.layout.align.abi > overflow_area_align { @@ -362,7 +364,7 @@ fn emit_powerpc_va_arg<'ll, 'tcx>( // Increase the overflow area. overflow_area = bx.inbounds_ptradd(overflow_area, bx.const_usize(size.bytes())); - bx.store(overflow_area, overflow_area_ptr, dl.pointer_align.abi); + bx.store(overflow_area, overflow_area_ptr, ptr_align_abi); bx.br(end); @@ -373,11 +375,8 @@ fn emit_powerpc_va_arg<'ll, 'tcx>( bx.switch_to_block(end); let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]); let val_type = layout.llvm_type(bx); - let val_addr = if is_indirect { - bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi) - } else { - val_addr - }; + let val_addr = + if is_indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr }; bx.load(val_type, val_addr, layout.align.abi) } @@ -414,6 +413,7 @@ fn emit_s390x_va_arg<'ll, 'tcx>( let in_reg = bx.append_sibling_block("va_arg.in_reg"); let in_mem = bx.append_sibling_block("va_arg.in_mem"); let end = bx.append_sibling_block("va_arg.end"); + let ptr_align_abi = dl.pointer_align().abi; // FIXME: vector ABI not yet supported. let target_ty_size = bx.cx.size_of(target_ty).bytes(); @@ -435,7 +435,7 @@ fn emit_s390x_va_arg<'ll, 'tcx>( bx.switch_to_block(in_reg); // Work out the address of the value in the register save area. - let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, dl.pointer_align.abi); + let reg_ptr_v = bx.load(bx.type_ptr(), reg_save_area, ptr_align_abi); let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8)); let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding)); let reg_addr = bx.ptradd(reg_ptr_v, reg_off); @@ -449,15 +449,14 @@ fn emit_s390x_va_arg<'ll, 'tcx>( bx.switch_to_block(in_mem); // Work out the address of the value in the argument overflow area. - let arg_ptr_v = - bx.load(bx.type_ptr(), overflow_arg_area, bx.tcx().data_layout.pointer_align.abi); + let arg_ptr_v = bx.load(bx.type_ptr(), overflow_arg_area, ptr_align_abi); let arg_off = bx.const_u64(padding); let mem_addr = bx.ptradd(arg_ptr_v, arg_off); // Update the argument overflow area pointer. let arg_size = bx.cx().const_u64(padded_size); let new_arg_ptr_v = bx.inbounds_ptradd(arg_ptr_v, arg_size); - bx.store(new_arg_ptr_v, overflow_arg_area, dl.pointer_align.abi); + bx.store(new_arg_ptr_v, overflow_arg_area, ptr_align_abi); bx.br(end); // Return the appropriate result. @@ -465,7 +464,7 @@ fn emit_s390x_va_arg<'ll, 'tcx>( let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]); let val_type = layout.llvm_type(bx); let val_addr = - if indirect { bx.load(bx.cx.type_ptr(), val_addr, dl.pointer_align.abi) } else { val_addr }; + if indirect { bx.load(bx.cx.type_ptr(), val_addr, ptr_align_abi) } else { val_addr }; bx.load(val_type, val_addr, layout.align.abi) } @@ -607,7 +606,7 @@ fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>( // loads than necessary. Can we clean this up? let reg_save_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset)); - let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi); + let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align().abi); let reg_addr = match layout.layout.backend_repr() { BackendRepr::Scalar(scalar) => match scalar.primitive() { @@ -749,10 +748,11 @@ fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>( layout: TyAndLayout<'tcx, Ty<'tcx>>, ) -> &'ll Value { let dl = bx.cx.data_layout(); + let ptr_align_abi = dl.data_layout().pointer_align().abi; let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8)); - let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi); + let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, ptr_align_abi); // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 // byte boundary if alignment needed by type exceeds 8 byte boundary. // It isn't stated explicitly in the standard, but in practice we use @@ -771,7 +771,7 @@ fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>( let size_in_bytes = layout.layout.size().bytes(); let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32); let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset); - bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi); + bx.store(overflow_arg_area, overflow_arg_area_ptr, ptr_align_abi); mem_addr } @@ -803,6 +803,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>( let from_stack = bx.append_sibling_block("va_arg.from_stack"); let from_regsave = bx.append_sibling_block("va_arg.from_regsave"); let end = bx.append_sibling_block("va_arg.end"); + let ptr_align_abi = bx.tcx().data_layout.pointer_align().abi; // (*va).va_ndx let va_reg_offset = 4; @@ -825,12 +826,11 @@ fn emit_xtensa_va_arg<'ll, 'tcx>( bx.switch_to_block(from_regsave); // update va_ndx - bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi); + bx.store(offset_next, offset_ptr, ptr_align_abi); // (*va).va_reg let regsave_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(va_reg_offset)); - let regsave_area = - bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi); + let regsave_area = bx.load(bx.type_ptr(), regsave_area_ptr, ptr_align_abi); let regsave_value_ptr = bx.inbounds_ptradd(regsave_area, offset); bx.br(end); @@ -849,11 +849,11 @@ fn emit_xtensa_va_arg<'ll, 'tcx>( // va_ndx = offset_next_corrected; let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size)); // update va_ndx - bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi); + bx.store(offset_next_corrected, offset_ptr, ptr_align_abi); // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) }; let stack_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(0)); - let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi); + let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, ptr_align_abi); let stack_value_ptr = bx.inbounds_ptradd(stack_area, offset_corrected); bx.br(end); @@ -861,7 +861,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>( // On big-endian, for values smaller than the slot size we'd have to align the read to the end // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa - // targets supported by rustc are litte-endian so don't worry about it. + // targets supported by rustc are little-endian so don't worry about it. // if from_regsave { // unsafe { *regsave_value_ptr } |
