diff options
Diffstat (limited to 'compiler/rustc_codegen_llvm')
24 files changed, 736 insertions, 276 deletions
diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index 5ab22f8fc4d..2d11628250c 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -19,7 +19,6 @@ object = { version = "0.37.0", default-features = false, features = ["std", "rea rustc-demangle = "0.1.21" rustc_abi = { path = "../rustc_abi" } rustc_ast = { path = "../rustc_ast" } -rustc_attr_data_structures = { path = "../rustc_attr_data_structures" } rustc_codegen_ssa = { path = "../rustc_codegen_ssa" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } @@ -38,11 +37,14 @@ rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } rustc_symbol_mangling = { path = "../rustc_symbol_mangling" } rustc_target = { path = "../rustc_target" } -serde = { version = "1", features = [ "derive" ]} +serde = { version = "1", features = ["derive"] } serde_json = "1" smallvec = { version = "1.8.1", features = ["union", "may_dangle"] } tracing = "0.1" # tidy-alphabetical-end [features] +# tidy-alphabetical-start check_only = ["rustc_llvm/check_only"] +# tidy-alphabetical-end + diff --git a/compiler/rustc_codegen_llvm/messages.ftl b/compiler/rustc_codegen_llvm/messages.ftl index f197ea74473..ce9a51b539d 100644 --- a/compiler/rustc_codegen_llvm/messages.ftl +++ b/compiler/rustc_codegen_llvm/messages.ftl @@ -2,10 +2,6 @@ codegen_llvm_autodiff_without_enable = using the autodiff feature requires -Z au codegen_llvm_copy_bitcode = failed to copy bitcode to object file: {$err} -codegen_llvm_dynamic_linking_with_lto = - cannot prefer dynamic linking when performing LTO - .note = only 'staticlib', 'bin', and 'cdylib' outputs are supported with LTO - codegen_llvm_fixed_x18_invalid_arch = the `-Zfixed-x18` flag is not supported on the `{$arch}` architecture @@ -16,13 +12,7 @@ codegen_llvm_from_llvm_optimization_diag = {$filename}:{$line}:{$column} {$pass_ codegen_llvm_load_bitcode = failed to load bitcode of module "{$name}" codegen_llvm_load_bitcode_with_llvm_err = failed to load bitcode of module "{$name}": {$llvm_err} -codegen_llvm_lto_bitcode_from_rlib = failed to get bitcode from object file for LTO ({$llvm_err}) - -codegen_llvm_lto_disallowed = lto can only be run for executables, cdylibs and static library outputs - -codegen_llvm_lto_dylib = lto cannot be used for `dylib` crate type without `-Zdylib-lto` - -codegen_llvm_lto_proc_macro = lto cannot be used for `proc-macro` crate type without `-Zdylib-lto` +codegen_llvm_lto_bitcode_from_rlib = failed to get bitcode from object file for LTO ({$err}) codegen_llvm_mismatch_data_layout = data-layout for target `{$rustc_target}`, `{$rustc_layout}`, differs from LLVM target's `{$llvm_target}` default layout, `{$llvm_layout}` diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index c32f11b27f3..c548f467583 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -1,6 +1,6 @@ //! Set and unset common attributes on LLVM values. -use rustc_attr_data_structures::{InlineAttr, InstructionSetAttr, OptimizeAttr}; use rustc_codegen_ssa::traits::*; +use rustc_hir::attrs::{InlineAttr, InstructionSetAttr, OptimizeAttr}; use rustc_hir::def_id::DefId; use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, PatchableFunctionEntry}; use rustc_middle::ty::{self, TyCtxt}; diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 655e1c95373..c269f11e931 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -1,33 +1,29 @@ use std::collections::BTreeMap; use std::ffi::{CStr, CString}; use std::fs::File; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::ptr::NonNull; use std::sync::Arc; use std::{io, iter, slice}; use object::read::archive::ArchiveFile; +use object::{Object, ObjectSection}; use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared}; -use rustc_codegen_ssa::back::symbol_export; use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput}; use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file}; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::memmap::Mmap; use rustc_errors::{DiagCtxtHandle, FatalError}; -use rustc_hir::def_id::LOCAL_CRATE; use rustc_middle::bug; use rustc_middle::dep_graph::WorkProduct; -use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel}; -use rustc_session::config::{self, CrateType, Lto}; +use rustc_session::config::{self, Lto}; use tracing::{debug, info}; use crate::back::write::{ self, CodegenDiagnosticsStage, DiagnosticHandlers, bitcode_section_name, save_temp_bitcode, }; -use crate::errors::{ - DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib, LtoProcMacro, -}; +use crate::errors::{LlvmError, LtoBitcodeFromRlib}; use crate::llvm::AttributePlace::Function; use crate::llvm::{self, build_string}; use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, attributes}; @@ -36,45 +32,21 @@ use crate::{LlvmCodegenBackend, ModuleLlvm, SimpleCx, attributes}; /// session to determine which CGUs we can reuse. const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin"; -fn crate_type_allows_lto(crate_type: CrateType) -> bool { - match crate_type { - CrateType::Executable - | CrateType::Dylib - | CrateType::Staticlib - | CrateType::Cdylib - | CrateType::ProcMacro - | CrateType::Sdylib => true, - CrateType::Rlib => false, - } -} - fn prepare_lto( cgcx: &CodegenContext<LlvmCodegenBackend>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], dcx: DiagCtxtHandle<'_>, ) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> { - let export_threshold = match cgcx.lto { - // We're just doing LTO for our one crate - Lto::ThinLocal => SymbolExportLevel::Rust, - - // We're doing LTO for the entire crate graph - Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types), - - Lto::No => panic!("didn't request LTO but we're doing LTO"), - }; + let mut symbols_below_threshold = exported_symbols_for_lto + .iter() + .map(|symbol| CString::new(symbol.to_owned()).unwrap()) + .collect::<Vec<CString>>(); - let symbol_filter = &|&(ref name, info): &(String, SymbolExportInfo)| { - if info.level.is_below_threshold(export_threshold) || info.used { - Some(CString::new(name.as_str()).unwrap()) - } else { - None - } - }; - let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); - let mut symbols_below_threshold = { - let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold"); - exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>() - }; - info!("{} symbols to preserve in this crate", symbols_below_threshold.len()); + // __llvm_profile_counter_bias is pulled in at link time by an undefined reference to + // __llvm_profile_runtime, therefore we won't know until link time if this symbol + // should have default visibility. + symbols_below_threshold.push(c"__llvm_profile_counter_bias".to_owned()); // If we're performing LTO for the entire crate graph, then for each of our // upstream dependencies, find the corresponding rlib and load the bitcode @@ -84,37 +56,7 @@ fn prepare_lto( // with either fat or thin LTO let mut upstream_modules = Vec::new(); if cgcx.lto != Lto::ThinLocal { - // Make sure we actually can run LTO - for crate_type in cgcx.crate_types.iter() { - if !crate_type_allows_lto(*crate_type) { - dcx.emit_err(LtoDisallowed); - return Err(FatalError); - } else if *crate_type == CrateType::Dylib { - if !cgcx.opts.unstable_opts.dylib_lto { - dcx.emit_err(LtoDylib); - return Err(FatalError); - } - } else if *crate_type == CrateType::ProcMacro && !cgcx.opts.unstable_opts.dylib_lto { - dcx.emit_err(LtoProcMacro); - return Err(FatalError); - } - } - - if cgcx.opts.cg.prefer_dynamic && !cgcx.opts.unstable_opts.dylib_lto { - dcx.emit_err(DynamicLinkingWithLTO); - return Err(FatalError); - } - - for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() { - let exported_symbols = - cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO"); - { - let _timer = - cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold"); - symbols_below_threshold - .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter)); - } - + for path in each_linked_rlib_for_lto { let archive_data = unsafe { Mmap::map(std::fs::File::open(&path).expect("couldn't open rlib")) .expect("couldn't map rlib") @@ -147,10 +89,6 @@ fn prepare_lto( } } - // __llvm_profile_counter_bias is pulled in at link time by an undefined reference to - // __llvm_profile_runtime, therefore we won't know until link time if this symbol - // should have default visibility. - symbols_below_threshold.push(c"__llvm_profile_counter_bias".to_owned()); Ok((symbols_below_threshold, upstream_modules)) } @@ -168,46 +106,32 @@ fn get_bitcode_slice_from_object_data<'a>( // name" which in the public API for sections gets treated as part of the section name, but // internally in MachOObjectFile.cpp gets treated separately. let section_name = bitcode_section_name(cgcx).to_str().unwrap().trim_start_matches("__LLVM,"); - let mut len = 0; - let data = unsafe { - llvm::LLVMRustGetSliceFromObjectDataByName( - obj.as_ptr(), - obj.len(), - section_name.as_ptr(), - section_name.len(), - &mut len, - ) - }; - if !data.is_null() { - assert!(len != 0); - let bc = unsafe { slice::from_raw_parts(data, len) }; - // `bc` must be a sub-slice of `obj`. - assert!(obj.as_ptr() <= bc.as_ptr()); - assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr()); + let obj = + object::File::parse(obj).map_err(|err| LtoBitcodeFromRlib { err: err.to_string() })?; - Ok(bc) - } else { - assert!(len == 0); - Err(LtoBitcodeFromRlib { - llvm_err: llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string()), - }) - } + let section = obj + .section_by_name(section_name) + .ok_or_else(|| LtoBitcodeFromRlib { err: format!("Can't find section {section_name}") })?; + + section.data().map_err(|err| LtoBitcodeFromRlib { err: err.to_string() }) } /// Performs fat LTO by merging all modules into a single one and returning it /// for further optimization. pub(crate) fn run_fat( cgcx: &CodegenContext<LlvmCodegenBackend>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<FatLtoInput<LlvmCodegenBackend>>, - cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, ) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> { let dcx = cgcx.create_dcx(); let dcx = dcx.handle(); - let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, dcx)?; + let (symbols_below_threshold, upstream_modules) = + prepare_lto(cgcx, exported_symbols_for_lto, each_linked_rlib_for_lto, dcx)?; let symbols_below_threshold = symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>(); - fat_lto(cgcx, dcx, modules, cached_modules, upstream_modules, &symbols_below_threshold) + fat_lto(cgcx, dcx, modules, upstream_modules, &symbols_below_threshold) } /// Performs thin LTO by performing necessary global analysis and returning two @@ -215,12 +139,15 @@ pub(crate) fn run_fat( /// can simply be copied over from the incr. comp. cache. pub(crate) fn run_thin( cgcx: &CodegenContext<LlvmCodegenBackend>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<(String, ThinBuffer)>, cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, ) -> Result<(Vec<ThinModule<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> { let dcx = cgcx.create_dcx(); let dcx = dcx.handle(); - let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, dcx)?; + let (symbols_below_threshold, upstream_modules) = + prepare_lto(cgcx, exported_symbols_for_lto, each_linked_rlib_for_lto, dcx)?; let symbols_below_threshold = symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>(); if cgcx.opts.cg.linker_plugin_lto.enabled() { @@ -245,7 +172,6 @@ fn fat_lto( cgcx: &CodegenContext<LlvmCodegenBackend>, dcx: DiagCtxtHandle<'_>, modules: Vec<FatLtoInput<LlvmCodegenBackend>>, - cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>, symbols_below_threshold: &[*const libc::c_char], ) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> { @@ -258,21 +184,12 @@ fn fat_lto( // modules that are serialized in-memory. // * `in_memory` contains modules which are already parsed and in-memory, // such as from multi-CGU builds. - // - // All of `cached_modules` (cached from previous incremental builds) can - // immediately go onto the `serialized_modules` modules list and then we can - // split the `modules` array into these two lists. let mut in_memory = Vec::new(); - serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| { - info!("pushing cached module {:?}", wp.cgu_name); - (buffer, CString::new(wp.cgu_name).unwrap()) - })); for module in modules { match module { FatLtoInput::InMemory(m) => in_memory.push(m), FatLtoInput::Serialized { name, buffer } => { info!("pushing serialized module {:?}", name); - let buffer = SerializedModule::Local(buffer); serialized_modules.push((buffer, CString::new(name).unwrap())); } } @@ -573,10 +490,10 @@ fn thin_lto( // Save the current ThinLTO import information for the next compilation // session, overwriting the previous serialized data (if any). - if let Some(path) = key_map_path { - if let Err(err) = curr_key_map.save_to_file(&path) { - return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err })); - } + if let Some(path) = key_map_path + && let Err(err) = curr_key_map.save_to_file(&path) + { + return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err })); } Ok((opt_jobs, copy_jobs)) @@ -654,6 +571,7 @@ pub(crate) fn run_pass_manager( // We then run the llvm_optimize function a second time, to optimize the code which we generated // in the enzyme differentiation pass. let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable); + let enable_gpu = config.offload.contains(&config::Offload::Enable); let stage = if thin { write::AutodiffStage::PreAD } else { @@ -668,6 +586,12 @@ pub(crate) fn run_pass_manager( write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?; } + if enable_gpu && !thin { + let cx = + SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size); + crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx); + } + if cfg!(llvm_enzyme) && enable_ad && !thin { let cx = SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size); diff --git a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs index dfde4595590..8e82013e94a 100644 --- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs +++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs @@ -39,6 +39,7 @@ impl OwnedTargetMachine { debug_info_compression: &CStr, use_emulated_tls: bool, args_cstr_buff: &[u8], + use_wasm_eh: bool, ) -> Result<Self, LlvmError<'static>> { assert!(args_cstr_buff.len() > 0); assert!( @@ -72,6 +73,7 @@ impl OwnedTargetMachine { use_emulated_tls, args_cstr_buff.as_ptr() as *const c_char, args_cstr_buff.len(), + use_wasm_eh, ) }; diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index 68279008c03..85a06f457eb 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -15,6 +15,7 @@ use rustc_codegen_ssa::back::write::{ BitcodeSection, CodegenContext, EmitObj, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn, }; +use rustc_codegen_ssa::base::wants_wasm_eh; use rustc_codegen_ssa::traits::*; use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind}; use rustc_data_structures::profiling::SelfProfilerRef; @@ -285,6 +286,8 @@ pub(crate) fn target_machine_factory( let file_name_display_preference = sess.filename_display_preference(RemapPathScopeComponents::DEBUGINFO); + let use_wasm_eh = wants_wasm_eh(sess); + Arc::new(move |config: TargetMachineFactoryConfig| { let path_to_cstring_helper = |path: Option<PathBuf>| -> CString { let path = path.unwrap_or_default(); @@ -321,6 +324,7 @@ pub(crate) fn target_machine_factory( &debuginfo_compression, use_emulated_tls, &args_cstr_buff, + use_wasm_eh, ) }) } @@ -792,29 +796,6 @@ pub(crate) fn optimize( Ok(()) } -pub(crate) fn link( - cgcx: &CodegenContext<LlvmCodegenBackend>, - dcx: DiagCtxtHandle<'_>, - mut modules: Vec<ModuleCodegen<ModuleLlvm>>, -) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> { - use super::lto::{Linker, ModuleBuffer}; - // Sort the modules by name to ensure deterministic behavior. - modules.sort_by(|a, b| a.name.cmp(&b.name)); - let (first, elements) = - modules.split_first().expect("Bug! modules must contain at least one module."); - - let mut linker = Linker::new(first.module_llvm.llmod()); - for module in elements { - let _timer = cgcx.prof.generic_activity_with_arg("LLVM_link_module", &*module.name); - let buffer = ModuleBuffer::new(module.module_llvm.llmod()); - linker - .add(buffer.data()) - .map_err(|()| llvm_err(dcx, LlvmError::SerializeModule { name: &module.name }))?; - } - drop(linker); - Ok(modules.remove(0)) -} - pub(crate) fn codegen( cgcx: &CodegenContext<LlvmCodegenBackend>, module: ModuleCodegen<ModuleLlvm>, diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 514923ad6f3..da2a153d819 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -3,6 +3,7 @@ use std::ops::Deref; use std::{iter, ptr}; pub(crate) mod autodiff; +pub(crate) mod gpu_offload; use libc::{c_char, c_uint, size_t}; use rustc_abi as abi; @@ -14,6 +15,7 @@ use rustc_codegen_ssa::mir::place::PlaceRef; use rustc_codegen_ssa::traits::*; use rustc_data_structures::small_c_str::SmallCStr; use rustc_hir::def_id::DefId; +use rustc_middle::bug; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; use rustc_middle::ty::layout::{ FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasTypingEnv, LayoutError, LayoutOfHelpers, @@ -23,7 +25,7 @@ use rustc_middle::ty::{self, Instance, Ty, TyCtxt}; use rustc_sanitizers::{cfi, kcfi}; use rustc_session::config::OptLevel; use rustc_span::Span; -use rustc_target::callconv::FnAbi; +use rustc_target::callconv::{FnAbi, PassMode}; use rustc_target::spec::{HasTargetSpec, SanitizerSet, Target}; use smallvec::SmallVec; use tracing::{debug, instrument}; @@ -117,6 +119,74 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> { } bx } + + // The generic builder has less functionality and thus (unlike the other alloca) we can not + // easily jump to the beginning of the function to place our allocas there. We trust the user + // to manually do that. FIXME(offload): improve the genericCx and add more llvm wrappers to + // handle this. + pub(crate) fn direct_alloca(&mut self, ty: &'ll Type, align: Align, name: &str) -> &'ll Value { + let val = unsafe { + let alloca = llvm::LLVMBuildAlloca(self.llbuilder, ty, UNNAMED); + llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint); + // Cast to default addrspace if necessary + llvm::LLVMBuildPointerCast(self.llbuilder, alloca, self.cx.type_ptr(), UNNAMED) + }; + if name != "" { + let name = std::ffi::CString::new(name).unwrap(); + llvm::set_value_name(val, &name.as_bytes()); + } + val + } + + pub(crate) fn inbounds_gep( + &mut self, + ty: &'ll Type, + ptr: &'ll Value, + indices: &[&'ll Value], + ) -> &'ll Value { + unsafe { + llvm::LLVMBuildGEPWithNoWrapFlags( + self.llbuilder, + ty, + ptr, + indices.as_ptr(), + indices.len() as c_uint, + UNNAMED, + GEPNoWrapFlags::InBounds, + ) + } + } + + pub(crate) fn store(&mut self, val: &'ll Value, ptr: &'ll Value, align: Align) -> &'ll Value { + debug!("Store {:?} -> {:?}", val, ptr); + assert_eq!(self.cx.type_kind(self.cx.val_ty(ptr)), TypeKind::Pointer); + unsafe { + let store = llvm::LLVMBuildStore(self.llbuilder, val, ptr); + llvm::LLVMSetAlignment(store, align.bytes() as c_uint); + store + } + } + + pub(crate) fn load(&mut self, ty: &'ll Type, ptr: &'ll Value, align: Align) -> &'ll Value { + unsafe { + let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED); + llvm::LLVMSetAlignment(load, align.bytes() as c_uint); + load + } + } + + fn memset(&mut self, ptr: &'ll Value, fill_byte: &'ll Value, size: &'ll Value, align: Align) { + unsafe { + llvm::LLVMRustBuildMemSet( + self.llbuilder, + ptr, + align.bytes() as c_uint, + fill_byte, + size, + false, + ); + } + } } /// Empty string, to be used where LLVM expects an instruction name, indicating @@ -618,10 +688,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { bx.nonnull_metadata(load); } - if let Some(pointee) = layout.pointee_info_at(bx, offset) { - if let Some(_) = pointee.safe { - bx.align_metadata(load, pointee.align); - } + if let Some(pointee) = layout.pointee_info_at(bx, offset) + && let Some(_) = pointee.safe + { + bx.align_metadata(load, pointee.align); } } abi::Primitive::Float(_) => {} @@ -1362,6 +1432,28 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { call } + fn tail_call( + &mut self, + llty: Self::Type, + fn_attrs: Option<&CodegenFnAttrs>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + llfn: Self::Value, + args: &[Self::Value], + funclet: Option<&Self::Funclet>, + instance: Option<Instance<'tcx>>, + ) { + let call = self.call(llty, fn_attrs, Some(fn_abi), llfn, args, funclet, instance); + llvm::LLVMRustSetTailCallKind(call, llvm::TailCallKind::MustTail); + + match &fn_abi.ret.mode { + PassMode::Ignore | PassMode::Indirect { .. } => self.ret_void(), + PassMode::Direct(_) | PassMode::Pair { .. } => self.ret(call), + mode @ PassMode::Cast { .. } => { + bug!("Encountered `PassMode::{mode:?}` during codegen") + } + } + } + fn zext(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value { unsafe { llvm::LLVMBuildZExt(self.llbuilder, val, dest_ty, UNNAMED) } } diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs new file mode 100644 index 00000000000..1280ab1442a --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs @@ -0,0 +1,439 @@ +use std::ffi::CString; + +use llvm::Linkage::*; +use rustc_abi::Align; +use rustc_codegen_ssa::back::write::CodegenContext; +use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; + +use crate::builder::SBuilder; +use crate::common::AsCCharPtr; +use crate::llvm::AttributePlace::Function; +use crate::llvm::{self, Linkage, Type, Value}; +use crate::{LlvmCodegenBackend, SimpleCx, attributes}; + +pub(crate) fn handle_gpu_code<'ll>( + _cgcx: &CodegenContext<LlvmCodegenBackend>, + cx: &'ll SimpleCx<'_>, +) { + // The offload memory transfer type for each kernel + let mut o_types = vec![]; + let mut kernels = vec![]; + let offload_entry_ty = add_tgt_offload_entry(&cx); + for num in 0..9 { + let kernel = cx.get_function(&format!("kernel_{num}")); + if let Some(kernel) = kernel { + o_types.push(gen_define_handling(&cx, kernel, offload_entry_ty, num)); + kernels.push(kernel); + } + } + + gen_call_handling(&cx, &kernels, &o_types); +} + +// What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper: +// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +// @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +fn generate_at_one<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Value { + // @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 + let unknown_txt = ";unknown;unknown;0;0;;"; + let c_entry_name = CString::new(unknown_txt).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let at_zero = add_unnamed_global(&cx, &"", initializer, PrivateLinkage); + llvm::set_alignment(at_zero, Align::ONE); + + // @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 + let struct_ident_ty = cx.type_named_struct("struct.ident_t"); + let struct_elems = vec![ + cx.get_const_i32(0), + cx.get_const_i32(2), + cx.get_const_i32(0), + cx.get_const_i32(22), + at_zero, + ]; + let struct_elems_ty: Vec<_> = struct_elems.iter().map(|&x| cx.val_ty(x)).collect(); + let initializer = crate::common::named_struct(struct_ident_ty, &struct_elems); + cx.set_struct_body(struct_ident_ty, &struct_elems_ty, false); + let at_one = add_unnamed_global(&cx, &"", initializer, PrivateLinkage); + llvm::set_alignment(at_one, Align::EIGHT); + at_one +} + +pub(crate) fn add_tgt_offload_entry<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Type { + let offload_entry_ty = cx.type_named_struct("struct.__tgt_offload_entry"); + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let ti16 = cx.type_i16(); + // For each kernel to run on the gpu, we will later generate one entry of this type. + // copied from LLVM + // typedef struct { + // uint64_t Reserved; + // uint16_t Version; + // uint16_t Kind; + // uint32_t Flags; Flags associated with the entry (see Target Region Entry Flags) + // void *Address; Address of global symbol within device image (function or global) + // char *SymbolName; + // uint64_t Size; Size of the entry info (0 if it is a function) + // uint64_t Data; + // void *AuxAddr; + // } __tgt_offload_entry; + let entry_elements = vec![ti64, ti16, ti16, ti32, tptr, tptr, ti64, ti64, tptr]; + cx.set_struct_body(offload_entry_ty, &entry_elements, false); + offload_entry_ty +} + +fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) { + let kernel_arguments_ty = cx.type_named_struct("struct.__tgt_kernel_arguments"); + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let tarr = cx.type_array(ti32, 3); + + // Taken from the LLVM APITypes.h declaration: + //struct KernelArgsTy { + // uint32_t Version = 0; // Version of this struct for ABI compatibility. + // uint32_t NumArgs = 0; // Number of arguments in each input pointer. + // void **ArgBasePtrs = + // nullptr; // Base pointer of each argument (e.g. a struct). + // void **ArgPtrs = nullptr; // Pointer to the argument data. + // int64_t *ArgSizes = nullptr; // Size of the argument data in bytes. + // int64_t *ArgTypes = nullptr; // Type of the data (e.g. to / from). + // void **ArgNames = nullptr; // Name of the data for debugging, possibly null. + // void **ArgMappers = nullptr; // User-defined mappers, possibly null. + // uint64_t Tripcount = + // 0; // Tripcount for the teams / distribute loop, 0 otherwise. + // struct { + // uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause. + // uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA. + // uint64_t Unused : 62; + // } Flags = {0, 0, 0}; + // // The number of teams (for x,y,z dimension). + // uint32_t NumTeams[3] = {0, 0, 0}; + // // The number of threads (for x,y,z dimension). + // uint32_t ThreadLimit[3] = {0, 0, 0}; + // uint32_t DynCGroupMem = 0; // Amount of dynamic cgroup memory requested. + //}; + let kernel_elements = + vec![ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32]; + + cx.set_struct_body(kernel_arguments_ty, &kernel_elements, false); + // For now we don't handle kernels, so for now we just add a global dummy + // to make sure that the __tgt_offload_entry is defined and handled correctly. + cx.declare_global("my_struct_global2", kernel_arguments_ty); +} + +fn gen_tgt_data_mappers<'ll>( + cx: &'ll SimpleCx<'_>, +) -> (&'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Type) { + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + + let args = vec![tptr, ti64, ti32, tptr, tptr, tptr, tptr, tptr, tptr]; + let mapper_fn_ty = cx.type_func(&args, cx.type_void()); + let mapper_begin = "__tgt_target_data_begin_mapper"; + let mapper_update = "__tgt_target_data_update_mapper"; + let mapper_end = "__tgt_target_data_end_mapper"; + let begin_mapper_decl = declare_offload_fn(&cx, mapper_begin, mapper_fn_ty); + let update_mapper_decl = declare_offload_fn(&cx, mapper_update, mapper_fn_ty); + let end_mapper_decl = declare_offload_fn(&cx, mapper_end, mapper_fn_ty); + + let nounwind = llvm::AttributeKind::NoUnwind.create_attr(cx.llcx); + attributes::apply_to_llfn(begin_mapper_decl, Function, &[nounwind]); + attributes::apply_to_llfn(update_mapper_decl, Function, &[nounwind]); + attributes::apply_to_llfn(end_mapper_decl, Function, &[nounwind]); + + (begin_mapper_decl, update_mapper_decl, end_mapper_decl, mapper_fn_ty) +} + +fn add_priv_unnamed_arr<'ll>(cx: &SimpleCx<'ll>, name: &str, vals: &[u64]) -> &'ll llvm::Value { + let ti64 = cx.type_i64(); + let mut size_val = Vec::with_capacity(vals.len()); + for &val in vals { + size_val.push(cx.get_const_i64(val)); + } + let initializer = cx.const_array(ti64, &size_val); + add_unnamed_global(cx, name, initializer, PrivateLinkage) +} + +pub(crate) fn add_unnamed_global<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, +) -> &'ll llvm::Value { + let llglobal = add_global(cx, name, initializer, l); + llvm::LLVMSetUnnamedAddress(llglobal, llvm::UnnamedAddr::Global); + llglobal +} + +pub(crate) fn add_global<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, +) -> &'ll llvm::Value { + let c_name = CString::new(name).unwrap(); + let llglobal: &'ll llvm::Value = llvm::add_global(cx.llmod, cx.val_ty(initializer), &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, l); + llvm::set_initializer(llglobal, initializer); + llglobal +} + +fn gen_define_handling<'ll>( + cx: &'ll SimpleCx<'_>, + kernel: &'ll llvm::Value, + offload_entry_ty: &'ll llvm::Type, + num: i64, +) -> &'ll llvm::Value { + let types = cx.func_params_types(cx.get_type_of_global(kernel)); + // It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or + // reference) types. + let num_ptr_types = types + .iter() + .map(|&x| matches!(cx.type_kind(x), rustc_codegen_ssa::common::TypeKind::Pointer)) + .count(); + + // We do not know their size anymore at this level, so hardcode a placeholder. + // A follow-up pr will track these from the frontend, where we still have Rust types. + // Then, we will be able to figure out that e.g. `&[f32;256]` will result in 4*256 bytes. + // I decided that 1024 bytes is a great placeholder value for now. + add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{num}"), &vec![1024; num_ptr_types]); + // Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2), + // or both to and from the gpu (=3). Other values shouldn't affect us for now. + // A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten + // will be 2. For now, everything is 3, until we have our frontend set up. + let o_types = + add_priv_unnamed_arr(&cx, &format!(".offload_maptypes.{num}"), &vec![3; num_ptr_types]); + // Next: For each function, generate these three entries. A weak constant, + // the llvm.rodata entry name, and the omp_offloading_entries value + + let name = format!(".kernel_{num}.region_id"); + let initializer = cx.get_const_i8(0); + let region_id = add_unnamed_global(&cx, &name, initializer, WeakAnyLinkage); + + let c_entry_name = CString::new(format!("kernel_{num}")).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let offload_entry_name = format!(".offloading.entry_name.{num}"); + + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let llglobal = add_unnamed_global(&cx, &offload_entry_name, initializer, InternalLinkage); + llvm::set_alignment(llglobal, Align::ONE); + llvm::set_section(llglobal, c".llvm.rodata.offloading"); + + // Not actively used yet, for calling real kernels + let name = format!(".offloading.entry.kernel_{num}"); + + // See the __tgt_offload_entry documentation above. + let reserved = cx.get_const_i64(0); + let version = cx.get_const_i16(1); + let kind = cx.get_const_i16(1); + let flags = cx.get_const_i32(0); + let size = cx.get_const_i64(0); + let data = cx.get_const_i64(0); + let aux_addr = cx.const_null(cx.type_ptr()); + let elems = vec![reserved, version, kind, flags, region_id, llglobal, size, data, aux_addr]; + + let initializer = crate::common::named_struct(offload_entry_ty, &elems); + let c_name = CString::new(name).unwrap(); + let llglobal = llvm::add_global(cx.llmod, offload_entry_ty, &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, WeakAnyLinkage); + llvm::set_initializer(llglobal, initializer); + llvm::set_alignment(llglobal, Align::ONE); + let c_section_name = CString::new(".omp_offloading_entries").unwrap(); + llvm::set_section(llglobal, &c_section_name); + o_types +} + +fn declare_offload_fn<'ll>( + cx: &'ll SimpleCx<'_>, + name: &str, + ty: &'ll llvm::Type, +) -> &'ll llvm::Value { + crate::declare::declare_simple_fn( + cx, + name, + llvm::CallConv::CCallConv, + llvm::UnnamedAddr::No, + llvm::Visibility::Default, + ty, + ) +} + +// For each kernel *call*, we now use some of our previous declared globals to move data to and from +// the gpu. We don't have a proper frontend yet, so we assume that every call to a kernel function +// from main is intended to run on the GPU. For now, we only handle the data transfer part of it. +// If two consecutive kernels use the same memory, we still move it to the host and back to the gpu. +// Since in our frontend users (by default) don't have to specify data transfer, this is something +// we should optimize in the future! We also assume that everything should be copied back and forth, +// but sometimes we can directly zero-allocate on the device and only move back, or if something is +// immutable, we might only copy it to the device, but not back. +// +// Current steps: +// 0. Alloca some variables for the following steps +// 1. set insert point before kernel call. +// 2. generate all the GEPS and stores, to be used in 3) +// 3. generate __tgt_target_data_begin calls to move data to the GPU +// +// unchanged: keep kernel call. Later move the kernel to the GPU +// +// 4. set insert point after kernel call. +// 5. generate all the GEPS and stores, to be used in 6) +// 6. generate __tgt_target_data_end calls to move data from the GPU +fn gen_call_handling<'ll>( + cx: &'ll SimpleCx<'_>, + _kernels: &[&'ll llvm::Value], + o_types: &[&'ll llvm::Value], +) { + // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } + let tptr = cx.type_ptr(); + let ti32 = cx.type_i32(); + let tgt_bin_desc_ty = vec![ti32, tptr, tptr, tptr]; + let tgt_bin_desc = cx.type_named_struct("struct.__tgt_bin_desc"); + cx.set_struct_body(tgt_bin_desc, &tgt_bin_desc_ty, false); + + gen_tgt_kernel_global(&cx); + let (begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers(&cx); + + let main_fn = cx.get_function("main"); + let Some(main_fn) = main_fn else { return }; + let kernel_name = "kernel_1"; + let call = unsafe { + llvm::LLVMRustGetFunctionCall(main_fn, kernel_name.as_c_char_ptr(), kernel_name.len()) + }; + let Some(kernel_call) = call else { + return; + }; + let kernel_call_bb = unsafe { llvm::LLVMGetInstructionParent(kernel_call) }; + let called = unsafe { llvm::LLVMGetCalledValue(kernel_call).unwrap() }; + let mut builder = SBuilder::build(cx, kernel_call_bb); + + let types = cx.func_params_types(cx.get_type_of_global(called)); + let num_args = types.len() as u64; + + // Step 0) + // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } + // %6 = alloca %struct.__tgt_bin_desc, align 8 + unsafe { llvm::LLVMRustPositionBuilderPastAllocas(builder.llbuilder, main_fn) }; + + let tgt_bin_desc_alloca = builder.direct_alloca(tgt_bin_desc, Align::EIGHT, "EmptyDesc"); + + let ty = cx.type_array(cx.type_ptr(), num_args); + // Baseptr are just the input pointer to the kernel, stored in a local alloca + let a1 = builder.direct_alloca(ty, Align::EIGHT, ".offload_baseptrs"); + // Ptrs are the result of a gep into the baseptr, at least for our trivial types. + let a2 = builder.direct_alloca(ty, Align::EIGHT, ".offload_ptrs"); + // These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16. + let ty2 = cx.type_array(cx.type_i64(), num_args); + let a4 = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes"); + // Now we allocate once per function param, a copy to be passed to one of our maps. + let mut vals = vec![]; + let mut geps = vec![]; + let i32_0 = cx.get_const_i32(0); + for (index, in_ty) in types.iter().enumerate() { + // get function arg, store it into the alloca, and read it. + let p = llvm::get_param(called, index as u32); + let name = llvm::get_value_name(p); + let name = str::from_utf8(&name).unwrap(); + let arg_name = format!("{name}.addr"); + let alloca = builder.direct_alloca(in_ty, Align::EIGHT, &arg_name); + + builder.store(p, alloca, Align::EIGHT); + let val = builder.load(in_ty, alloca, Align::EIGHT); + let gep = builder.inbounds_gep(cx.type_f32(), val, &[i32_0]); + vals.push(val); + geps.push(gep); + } + + // Step 1) + unsafe { llvm::LLVMRustPositionBefore(builder.llbuilder, kernel_call) }; + builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT); + + let mapper_fn_ty = cx.type_func(&[cx.type_ptr()], cx.type_void()); + let register_lib_decl = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty); + let unregister_lib_decl = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty); + let init_ty = cx.type_func(&[], cx.type_void()); + let init_rtls_decl = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty); + + // call void @__tgt_register_lib(ptr noundef %6) + builder.call(mapper_fn_ty, register_lib_decl, &[tgt_bin_desc_alloca], None); + // call void @__tgt_init_all_rtls() + builder.call(init_ty, init_rtls_decl, &[], None); + + for i in 0..num_args { + let idx = cx.get_const_i32(i); + let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, idx]); + builder.store(vals[i as usize], gep1, Align::EIGHT); + let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, idx]); + builder.store(geps[i as usize], gep2, Align::EIGHT); + let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]); + // As mentioned above, we don't use Rust type information yet. So for now we will just + // assume that we have 1024 bytes, 256 f32 values. + // FIXME(offload): write an offload frontend and handle arbitrary types. + builder.store(cx.get_const_i64(1024), gep3, Align::EIGHT); + } + + // For now we have a very simplistic indexing scheme into our + // offload_{baseptrs,ptrs,sizes}. We will probably improve this along with our gpu frontend pr. + fn get_geps<'a, 'll>( + builder: &mut SBuilder<'a, 'll>, + cx: &'ll SimpleCx<'ll>, + ty: &'ll Type, + ty2: &'ll Type, + a1: &'ll Value, + a2: &'ll Value, + a4: &'ll Value, + ) -> (&'ll Value, &'ll Value, &'ll Value) { + let i32_0 = cx.get_const_i32(0); + + let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, i32_0]); + let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, i32_0]); + let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, i32_0]); + (gep1, gep2, gep3) + } + + fn generate_mapper_call<'a, 'll>( + builder: &mut SBuilder<'a, 'll>, + cx: &'ll SimpleCx<'ll>, + geps: (&'ll Value, &'ll Value, &'ll Value), + o_type: &'ll Value, + fn_to_call: &'ll Value, + fn_ty: &'ll Type, + num_args: u64, + s_ident_t: &'ll Value, + ) { + let nullptr = cx.const_null(cx.type_ptr()); + let i64_max = cx.get_const_i64(u64::MAX); + let num_args = cx.get_const_i32(num_args); + let args = + vec![s_ident_t, i64_max, num_args, geps.0, geps.1, geps.2, o_type, nullptr, nullptr]; + builder.call(fn_ty, fn_to_call, &args, None); + } + + // Step 2) + let s_ident_t = generate_at_one(&cx); + let o = o_types[0]; + let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); + generate_mapper_call(&mut builder, &cx, geps, o, begin_mapper_decl, fn_ty, num_args, s_ident_t); + + // Step 3) + // Here we will add code for the actual kernel launches in a follow-up PR. + // FIXME(offload): launch kernels + + // Step 4) + unsafe { llvm::LLVMRustPositionAfter(builder.llbuilder, kernel_call) }; + + let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); + generate_mapper_call(&mut builder, &cx, geps, o, end_mapper_decl, fn_ty, num_args, s_ident_t); + + builder.call(mapper_fn_ty, unregister_lib_decl, &[tgt_bin_desc_alloca], None); + + // With this we generated the following begin and end mappers. We could easily generate the + // update mapper in an update. + // call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 3, ptr %27, ptr %28, ptr %29, ptr @.offload_maptypes, ptr null, ptr null) + // call void @__tgt_target_data_update_mapper(ptr @1, i64 -1, i32 2, ptr %46, ptr %47, ptr %48, ptr @.offload_maptypes.1, ptr null, ptr null) + // call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 3, ptr %49, ptr %50, ptr %51, ptr @.offload_maptypes, ptr null, ptr null) +} diff --git a/compiler/rustc_codegen_llvm/src/callee.rs b/compiler/rustc_codegen_llvm/src/callee.rs index 5a3dd90ab24..791a71d73ae 100644 --- a/compiler/rustc_codegen_llvm/src/callee.rs +++ b/compiler/rustc_codegen_llvm/src/callee.rs @@ -103,7 +103,7 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t // This is a monomorphization of a generic function. if !(cx.tcx.sess.opts.share_generics() || tcx.codegen_instance_attrs(instance.def).inline - == rustc_attr_data_structures::InlineAttr::Never) + == rustc_hir::attrs::InlineAttr::Never) { // When not sharing generics, all instances are in the same // crate and have hidden visibility. diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index f9ab96b5789..f29fefb66f0 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -118,6 +118,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { r } } + + pub(crate) fn const_null(&self, t: &'ll Type) -> &'ll Value { + unsafe { llvm::LLVMConstNull(t) } + } } impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { @@ -377,6 +381,11 @@ pub(crate) fn bytes_in_context<'ll>(llcx: &'ll llvm::Context, bytes: &[u8]) -> & } } +pub(crate) fn named_struct<'ll>(ty: &'ll Type, elts: &[&'ll Value]) -> &'ll Value { + let len = c_uint::try_from(elts.len()).expect("LLVMConstStructInContext elements len overflow"); + unsafe { llvm::LLVMConstNamedStruct(ty, elts.as_ptr(), len) } +} + fn struct_in_context<'ll>( llcx: &'ll llvm::Context, elts: &[&'ll Value], diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 6a23becaa96..ee77774c688 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -207,11 +207,16 @@ pub(crate) unsafe fn create_module<'ll>( // LLVM 21 updated the default layout on nvptx: https://github.com/llvm/llvm-project/pull/124961 target_data_layout = target_data_layout.replace("e-p6:32:32-i64", "e-i64"); } + if sess.target.arch == "amdgpu" { + // LLVM 21 adds the address width for address space 8. + // See https://github.com/llvm/llvm-project/pull/139419 + target_data_layout = target_data_layout.replace("p8:128:128:128:48", "p8:128:128") + } } // Ensure the data-layout values hardcoded remain the defaults. { - let tm = crate::back::write::create_informational_target_machine(tcx.sess, false); + let tm = crate::back::write::create_informational_target_machine(sess, false); unsafe { llvm::LLVMRustSetDataLayoutFromTargetMachine(llmod, tm.raw()); } @@ -680,6 +685,22 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { unsafe { llvm::LLVMConstInt(ty, val, llvm::False) } } + pub(crate) fn get_const_i64(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i64(), n) + } + + pub(crate) fn get_const_i32(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i32(), n) + } + + pub(crate) fn get_const_i16(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i16(), n) + } + + pub(crate) fn get_const_i8(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i8(), n) + } + pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> { let name = SmallCStr::new(name); unsafe { llvm::LLVMGetNamedFunction((**self).borrow().llmod, name.as_ptr()) } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index a9be833a643..8c9dfcfd18c 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -46,21 +46,17 @@ pub(crate) fn finalize(cx: &mut CodegenCx<'_, '_>) { debug!("Generating coverage map for CodegenUnit: `{}`", cx.codegen_unit.name()); // FIXME(#132395): Can this be none even when coverage is enabled? - let instances_used = match cx.coverage_cx { - Some(ref cx) => cx.instances_used.borrow(), - None => return, - }; + let Some(ref coverage_cx) = cx.coverage_cx else { return }; - let mut covfun_records = instances_used - .iter() - .copied() + let mut covfun_records = coverage_cx + .instances_used() + .into_iter() // Sort by symbol name, so that the global file table is built in an // order that doesn't depend on the stable-hash-based order in which // instances were visited during codegen. .sorted_by_cached_key(|&instance| tcx.symbol_name(instance).name) .filter_map(|instance| prepare_covfun_record(tcx, instance, true)) .collect::<Vec<_>>(); - drop(instances_used); // In a single designated CGU, also prepare covfun records for functions // in this crate that were instrumented for coverage, but are unused. diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs index 39a59560c9d..574463be7ff 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs @@ -39,7 +39,10 @@ impl Coords { /// or other expansions), and if it does happen then skipping a span or function is /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid. pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) -> Option<Coords> { - let span = ensure_non_empty_span(source_map, span)?; + if span.is_empty() { + debug_assert!(false, "can't make coords from empty span: {span:?}"); + return None; + } let lo = span.lo(); let hi = span.hi(); @@ -70,29 +73,6 @@ pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) }) } -fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> { - if !span.is_empty() { - return Some(span); - } - - // The span is empty, so try to enlarge it to cover an adjacent '{' or '}'. - source_map - .span_to_source(span, |src, start, end| try { - // Adjusting span endpoints by `BytePos(1)` is normally a bug, - // but in this case we have specifically checked that the character - // we're skipping over is one of two specific ASCII characters, so - // adjusting by exactly 1 byte is correct. - if src.as_bytes().get(end).copied() == Some(b'{') { - Some(span.with_hi(span.hi() + BytePos(1))) - } else if start > 0 && src.as_bytes()[start - 1] == b'}' { - Some(span.with_lo(span.lo() - BytePos(1))) - } else { - None - } - }) - .ok()? -} - /// If `llvm-cov` sees a source region that is improperly ordered (end < start), /// it will immediately exit with a fatal error. To prevent that from happening, /// discard regions that are improperly ordered, or might be interpreted in a diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index eefbd7cf6c4..119237abd6b 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -5,7 +5,7 @@ use rustc_abi::Size; use rustc_codegen_ssa::traits::{ BuilderMethods, ConstCodegenMethods, CoverageInfoBuilderMethods, MiscCodegenMethods, }; -use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; use rustc_middle::mir::coverage::CoverageKind; use rustc_middle::ty::Instance; use tracing::{debug, instrument}; @@ -20,9 +20,14 @@ mod mapgen; /// Extra per-CGU context/state needed for coverage instrumentation. pub(crate) struct CguCoverageContext<'ll, 'tcx> { - /// Coverage data for each instrumented function identified by DefId. - pub(crate) instances_used: RefCell<FxIndexSet<Instance<'tcx>>>, - pub(crate) pgo_func_name_var_map: RefCell<FxHashMap<Instance<'tcx>, &'ll llvm::Value>>, + /// Associates function instances with an LLVM global that holds the + /// function's symbol name, as needed by LLVM coverage intrinsics. + /// + /// Instances in this map are also considered "used" for the purposes of + /// emitting covfun records. Every covfun record holds a hash of its + /// symbol name, and `llvm-cov` will exit fatally if it can't resolve that + /// hash back to an entry in the binary's `__llvm_prf_names` linker section. + pub(crate) pgo_func_name_var_map: RefCell<FxIndexMap<Instance<'tcx>, &'ll llvm::Value>>, pub(crate) mcdc_condition_bitmap_map: RefCell<FxHashMap<Instance<'tcx>, Vec<&'ll llvm::Value>>>, covfun_section_name: OnceCell<CString>, @@ -31,7 +36,6 @@ pub(crate) struct CguCoverageContext<'ll, 'tcx> { impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> { pub(crate) fn new() -> Self { Self { - instances_used: RefCell::<FxIndexSet<_>>::default(), pgo_func_name_var_map: Default::default(), mcdc_condition_bitmap_map: Default::default(), covfun_section_name: Default::default(), @@ -53,6 +57,14 @@ impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> { .and_then(|bitmap_map| bitmap_map.get(decision_depth as usize)) .copied() // Dereference Option<&&Value> to Option<&Value> } + + /// Returns the list of instances considered "used" in this CGU, as + /// inferred from the keys of `pgo_func_name_var_map`. + pub(crate) fn instances_used(&self) -> Vec<Instance<'tcx>> { + // Collecting into a Vec is way easier than trying to juggle RefCell + // projections, and this should only run once per CGU anyway. + self.pgo_func_name_var_map.borrow().keys().copied().collect::<Vec<_>>() + } } impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { @@ -78,7 +90,10 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// string, to hold the function name passed to LLVM intrinsic /// `instrprof.increment()`. The `Value` is only created once per instance. /// Multiple invocations with the same instance return the same `Value`. - fn get_pgo_func_name_var(&self, instance: Instance<'tcx>) -> &'ll llvm::Value { + /// + /// This has the side-effect of causing coverage codegen to consider this + /// function "used", making it eligible to emit an associated covfun record. + fn ensure_pgo_func_name_var(&self, instance: Instance<'tcx>) -> &'ll llvm::Value { debug!("getting pgo_func_name_var for instance={:?}", instance); let mut pgo_func_name_var_map = self.coverage_cx().pgo_func_name_var_map.borrow_mut(); pgo_func_name_var_map.entry(instance).or_insert_with(|| { @@ -102,7 +117,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { return; } - let fn_name = self.get_pgo_func_name_var(instance); + let fn_name = self.ensure_pgo_func_name_var(instance); let hash = self.const_u64(function_coverage_info.function_source_hash); let bitmap_bits = self.const_u32(function_coverage_info.mcdc_bitmap_bits as u32); self.mcdc_parameters(fn_name, hash, bitmap_bits); @@ -151,11 +166,6 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { return; }; - // Mark the instance as used in this CGU, for coverage purposes. - // This includes functions that were not partitioned into this CGU, - // but were MIR-inlined into one of this CGU's functions. - coverage_cx.instances_used.borrow_mut().insert(instance); - match *kind { CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!( "marker statement {kind:?} should have been removed by CleanupPostBorrowck" @@ -163,7 +173,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { CoverageKind::VirtualCounter { bcb } if let Some(&id) = ids_info.phys_counter_for_node.get(&bcb) => { - let fn_name = bx.get_pgo_func_name_var(instance); + let fn_name = bx.ensure_pgo_func_name_var(instance); let hash = bx.const_u64(function_coverage_info.function_source_hash); let num_counters = bx.const_u32(ids_info.num_counters); let index = bx.const_u32(id.as_u32()); @@ -193,7 +203,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { "bitmap index of the decision out of range" ); - let fn_name = bx.get_pgo_func_name_var(instance); + let fn_name = bx.ensure_pgo_func_name_var(instance); let hash = bx.const_u64(function_coverage_info.function_source_hash); let bitmap_index = bx.const_u32(bitmap_idx); bx.mcdc_tvbitmap_update(fn_name, hash, bitmap_index, cond_bitmap); diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs index 61555ac2f6f..bcfa0381cc1 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/gdb.rs @@ -1,9 +1,10 @@ // .debug_gdb_scripts binary section. -use rustc_attr_data_structures::{AttributeKind, find_attr}; use rustc_codegen_ssa::base::collect_debugger_visualizers_transitive; use rustc_codegen_ssa::traits::*; +use rustc_hir::attrs::AttributeKind; use rustc_hir::def_id::LOCAL_CRATE; +use rustc_hir::find_attr; use rustc_middle::bug; use rustc_middle::middle::debugger_visualizer::DebuggerVisualizerType; use rustc_session::config::{CrateType, DebugInfo}; diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs index 56fb12d3c22..d1502d2b1e6 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata/type_map.rs @@ -285,8 +285,8 @@ pub(super) fn build_type_with_children<'ll, 'tcx>( // Item(T), // } // ``` - let is_expanding_recursive = - debug_context(cx).adt_stack.borrow().iter().any(|(parent_def_id, parent_args)| { + let is_expanding_recursive = adt_def.is_enum() + && debug_context(cx).adt_stack.borrow().iter().any(|(parent_def_id, parent_args)| { if def_id == *parent_def_id { args.iter().zip(parent_args.iter()).any(|(arg, parent_arg)| { if let (Some(arg), Some(parent_arg)) = (arg.as_type(), parent_arg.as_type()) diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs index 5ca2505cec4..6cbf2dbf7d3 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs @@ -533,7 +533,7 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> { // First, let's see if this is a method within an inherent impl. Because // if yes, we want to make the result subroutine DIE a child of the // subroutine's self-type. - if let Some(impl_def_id) = cx.tcx.impl_of_method(instance.def_id()) { + if let Some(impl_def_id) = cx.tcx.impl_of_assoc(instance.def_id()) { // If the method does *not* belong to a trait, proceed if cx.tcx.trait_id_of_impl(impl_def_id).is_none() { let impl_self_ty = cx.tcx.instantiate_and_normalize_erasing_regions( diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs index eb75716d768..960a895a203 100644 --- a/compiler/rustc_codegen_llvm/src/declare.rs +++ b/compiler/rustc_codegen_llvm/src/declare.rs @@ -215,7 +215,9 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { llfn } +} +impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> { /// Declare a global with an intention to define it. /// /// Use this function when you intend to define a global. This function will @@ -234,13 +236,13 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// /// Use this function when you intend to define a global without a name. pub(crate) fn define_private_global(&self, ty: &'ll Type) -> &'ll Value { - unsafe { llvm::LLVMRustInsertPrivateGlobal(self.llmod, ty) } + unsafe { llvm::LLVMRustInsertPrivateGlobal(self.llmod(), ty) } } /// Gets declared value by name. pub(crate) fn get_declared_value(&self, name: &str) -> Option<&'ll Value> { debug!("get_declared_value(name={:?})", name); - unsafe { llvm::LLVMRustGetNamedValue(self.llmod, name.as_c_char_ptr(), name.len()) } + unsafe { llvm::LLVMRustGetNamedValue(self.llmod(), name.as_c_char_ptr(), name.len()) } } /// Gets defined or externally defined (AvailableExternally linkage) value by diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs index 31d49e86319..627b0c9ff3b 100644 --- a/compiler/rustc_codegen_llvm/src/errors.rs +++ b/compiler/rustc_codegen_llvm/src/errors.rs @@ -20,11 +20,6 @@ pub(crate) struct SymbolAlreadyDefined<'a> { #[diag(codegen_llvm_sanitizer_memtag_requires_mte)] pub(crate) struct SanitizerMemtagRequiresMte; -#[derive(Diagnostic)] -#[diag(codegen_llvm_dynamic_linking_with_lto)] -#[note] -pub(crate) struct DynamicLinkingWithLTO; - pub(crate) struct ParseTargetMachineConfig<'a>(pub LlvmError<'a>); impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> { @@ -42,21 +37,9 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> { pub(crate) struct AutoDiffWithoutEnable; #[derive(Diagnostic)] -#[diag(codegen_llvm_lto_disallowed)] -pub(crate) struct LtoDisallowed; - -#[derive(Diagnostic)] -#[diag(codegen_llvm_lto_dylib)] -pub(crate) struct LtoDylib; - -#[derive(Diagnostic)] -#[diag(codegen_llvm_lto_proc_macro)] -pub(crate) struct LtoProcMacro; - -#[derive(Diagnostic)] #[diag(codegen_llvm_lto_bitcode_from_rlib)] pub(crate) struct LtoBitcodeFromRlib { - pub llvm_err: String, + pub err: String, } #[derive(Diagnostic)] diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index fcc0d378f06..7b27e496986 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -382,26 +382,16 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { let width = size.bits(); let llty = self.type_ix(width); match name { - sym::ctlz | sym::cttz => { - let y = self.const_bool(false); - let ret = self.call_intrinsic( - format!("llvm.{name}"), - &[llty], - &[args[0].immediate(), y], - ); - - self.intcast(ret, result.layout.llvm_type(self), false) - } - sym::ctlz_nonzero => { - let y = self.const_bool(true); - let ret = - self.call_intrinsic("llvm.ctlz", &[llty], &[args[0].immediate(), y]); - self.intcast(ret, result.layout.llvm_type(self), false) - } - sym::cttz_nonzero => { - let y = self.const_bool(true); + sym::ctlz | sym::ctlz_nonzero | sym::cttz | sym::cttz_nonzero => { + let y = + self.const_bool(name == sym::ctlz_nonzero || name == sym::cttz_nonzero); + let llvm_name = if name == sym::ctlz || name == sym::ctlz_nonzero { + "llvm.ctlz" + } else { + "llvm.cttz" + }; let ret = - self.call_intrinsic("llvm.cttz", &[llty], &[args[0].immediate(), y]); + self.call_intrinsic(llvm_name, &[llty], &[args[0].immediate(), y]); self.intcast(ret, result.layout.llvm_type(self), false) } sym::ctpop => { diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 6db4e122ad6..ca84b6de8b1 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -22,6 +22,7 @@ use std::any::Any; use std::ffi::CStr; use std::mem::ManuallyDrop; +use std::path::PathBuf; use back::owned_target_machine::OwnedTargetMachine; use back::write::{create_informational_target_machine, create_target_machine}; @@ -167,20 +168,15 @@ impl WriteBackendMethods for LlvmCodegenBackend { let stats = llvm::build_string(|s| unsafe { llvm::LLVMRustPrintStatistics(s) }).unwrap(); print!("{stats}"); } - fn run_link( - cgcx: &CodegenContext<Self>, - dcx: DiagCtxtHandle<'_>, - modules: Vec<ModuleCodegen<Self::Module>>, - ) -> Result<ModuleCodegen<Self::Module>, FatalError> { - back::write::link(cgcx, dcx, modules) - } fn run_and_optimize_fat_lto( cgcx: &CodegenContext<Self>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<FatLtoInput<Self>>, - cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, diff_fncs: Vec<AutoDiffItem>, ) -> Result<ModuleCodegen<Self::Module>, FatalError> { - let mut module = back::lto::run_fat(cgcx, modules, cached_modules)?; + let mut module = + back::lto::run_fat(cgcx, exported_symbols_for_lto, each_linked_rlib_for_lto, modules)?; if !diff_fncs.is_empty() { builder::autodiff::differentiate(&module, cgcx, diff_fncs)?; @@ -194,10 +190,18 @@ impl WriteBackendMethods for LlvmCodegenBackend { } fn run_thin_lto( cgcx: &CodegenContext<Self>, + exported_symbols_for_lto: &[String], + each_linked_rlib_for_lto: &[PathBuf], modules: Vec<(String, Self::ThinBuffer)>, cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>, ) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError> { - back::lto::run_thin(cgcx, modules, cached_modules) + back::lto::run_thin( + cgcx, + exported_symbols_for_lto, + each_linked_rlib_for_lto, + modules, + cached_modules, + ) } fn optimize( cgcx: &CodegenContext<Self>, @@ -412,6 +416,20 @@ impl ModuleLlvm { } } + fn tm_from_cgcx( + cgcx: &CodegenContext<LlvmCodegenBackend>, + name: &str, + dcx: DiagCtxtHandle<'_>, + ) -> Result<OwnedTargetMachine, FatalError> { + let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, name); + match (cgcx.tm_factory)(tm_factory_config) { + Ok(m) => Ok(m), + Err(e) => { + return Err(dcx.emit_almost_fatal(ParseTargetMachineConfig(e))); + } + } + } + fn parse( cgcx: &CodegenContext<LlvmCodegenBackend>, name: &CStr, @@ -421,13 +439,7 @@ impl ModuleLlvm { unsafe { let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); let llmod_raw = back::lto::parse_module(llcx, name, buffer, dcx)?; - let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, name.to_str().unwrap()); - let tm = match (cgcx.tm_factory)(tm_factory_config) { - Ok(m) => m, - Err(e) => { - return Err(dcx.emit_almost_fatal(ParseTargetMachineConfig(e))); - } - }; + let tm = ModuleLlvm::tm_from_cgcx(cgcx, name.to_str().unwrap(), dcx)?; Ok(ModuleLlvm { llmod_raw, llcx, tm: ManuallyDrop::new(tm) }) } diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs index c696b8d8ff2..56d756e52cc 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs @@ -4,7 +4,7 @@ use libc::{c_char, c_uint}; use super::MetadataKindId; use super::ffi::{AttributeKind, BasicBlock, Metadata, Module, Type, Value}; -use crate::llvm::Bool; +use crate::llvm::{Bool, Builder}; #[link(name = "llvm-wrapper", kind = "static")] unsafe extern "C" { @@ -31,6 +31,14 @@ unsafe extern "C" { index: c_uint, kind: AttributeKind, ); + pub(crate) fn LLVMRustPositionBefore<'a>(B: &'a Builder<'_>, I: &'a Value); + pub(crate) fn LLVMRustPositionAfter<'a>(B: &'a Builder<'_>, I: &'a Value); + pub(crate) fn LLVMRustGetFunctionCall( + F: &Value, + name: *const c_char, + NameLen: libc::size_t, + ) -> Option<&Value>; + } unsafe extern "C" { diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 0b1e632cbc4..2443194ff48 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -97,6 +97,16 @@ pub(crate) enum ModuleFlagMergeBehavior { // Consts for the LLVM CallConv type, pre-cast to usize. +#[derive(Copy, Clone, PartialEq, Debug)] +#[repr(C)] +#[allow(dead_code)] +pub(crate) enum TailCallKind { + None = 0, + Tail = 1, + MustTail = 2, + NoTail = 3, +} + /// LLVM CallingConv::ID. Should we wrap this? /// /// See <https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/CallingConv.h> @@ -1138,6 +1148,11 @@ unsafe extern "C" { Count: c_uint, Packed: Bool, ) -> &'a Value; + pub(crate) fn LLVMConstNamedStruct<'a>( + StructTy: &'a Type, + ConstantVals: *const &'a Value, + Count: c_uint, + ) -> &'a Value; pub(crate) fn LLVMConstVector(ScalarConstantVals: *const &Value, Size: c_uint) -> &Value; // Constant expressions @@ -1181,6 +1196,7 @@ unsafe extern "C" { pub(crate) safe fn LLVMIsGlobalConstant(GlobalVar: &Value) -> Bool; pub(crate) safe fn LLVMSetGlobalConstant(GlobalVar: &Value, IsConstant: Bool); pub(crate) safe fn LLVMSetTailCall(CallInst: &Value, IsTailCall: Bool); + pub(crate) safe fn LLVMRustSetTailCallKind(CallInst: &Value, Kind: TailCallKind); // Operations on attributes pub(crate) fn LLVMCreateStringAttribute( @@ -1217,6 +1233,8 @@ unsafe extern "C" { ) -> &'a BasicBlock; // Operations on instructions + pub(crate) fn LLVMGetInstructionParent(Inst: &Value) -> &BasicBlock; + pub(crate) fn LLVMGetCalledValue(CallInst: &Value) -> Option<&Value>; pub(crate) fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>; pub(crate) fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock; pub(crate) fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>; @@ -2425,6 +2443,7 @@ unsafe extern "C" { UseEmulatedTls: bool, ArgsCstrBuff: *const c_char, ArgsCstrBuffLen: usize, + UseWasmEH: bool, ) -> *mut TargetMachine; pub(crate) fn LLVMRustDisposeTargetMachine(T: *mut TargetMachine); @@ -2556,6 +2575,7 @@ unsafe extern "C" { pub(crate) fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine); + pub(crate) fn LLVMRustPositionBuilderPastAllocas<'a>(B: &Builder<'a>, Fn: &'a Value); pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock); pub(crate) fn LLVMRustSetModulePICLevel(M: &Module); @@ -2603,13 +2623,6 @@ unsafe extern "C" { len: usize, Identifier: *const c_char, ) -> Option<&Module>; - pub(crate) fn LLVMRustGetSliceFromObjectDataByName( - data: *const u8, - len: usize, - name: *const u8, - name_len: usize, - out_len: &mut usize, - ) -> *const u8; pub(crate) fn LLVMRustLinkerNew(M: &Module) -> &mut Linker<'_>; pub(crate) fn LLVMRustLinkerAdd( diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 0fb987bdf82..53899da183a 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -405,6 +405,8 @@ fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) { ("mips64" | "mips64r6", _) => false, // Selection bug <https://github.com/llvm/llvm-project/issues/95471> ("nvptx64", _) => false, + // Unsupported https://github.com/llvm/llvm-project/issues/121122 + ("amdgpu", _) => false, // ABI bugs <https://github.com/rust-lang/rust/issues/125109> et al. (full // list at <https://github.com/rust-lang/rust/issues/116909>) ("powerpc" | "powerpc64", _) => false, @@ -433,6 +435,9 @@ fn update_target_reliable_float_cfg(sess: &Session, cfg: &mut TargetConfig) { // This rules out anything that doesn't have `long double` = `binary128`; <= 32 bits // (ld is `f64`), anything other than Linux (Windows and MacOS use `f64`), and `x86` // (ld is 80-bit extended precision). + // + // musl does not implement the symbols required for f128 math at all. + _ if target_env == "musl" => false, ("x86_64", _) => false, (_, "linux") if target_pointer_width == 64 => true, _ => false, |
