diff options
| author | Jacob Pratt <jacob@jhpratt.dev> | 2025-02-16 00:51:24 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-16 00:51:24 -0500 |
| commit | 20004d4bdd787b3e3b664827acafdd6c2ebfbce3 (patch) | |
| tree | 7a6ad25a790a57d08fa35f28c28d042267cc4d86 /compiler/rustc_codegen_ssa/src/back | |
| parent | aa37d799fc5e01b4cd5e700d67c10d184ae7d321 (diff) | |
| parent | 99ec64c34c65630d9a66befa2cbf2faa9478baa0 (diff) | |
| download | rust-20004d4bdd787b3e3b664827acafdd6c2ebfbce3.tar.gz rust-20004d4bdd787b3e3b664827acafdd6c2ebfbce3.zip | |
Rollup merge of #135909 - Flakebi:amdgpu-kd, r=jieyouxu,workingjubilee
Export kernel descriptor for amdgpu kernels The host runtime (HIP or HSA) expects a kernel descriptor object for each kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol with the name of the kernel plus a `.kd` suffix. Add it to the exported symbols in the linker script, so that it can be found. For reference, the symbol is created here in LLVM: https://github.com/llvm/llvm-project/blob/d5457e4c1619e5dbeefd49841e284cbc24d35cb4/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp#L966 I wrote [a test](https://github.com/Flakebi/rust/commit/6a9115b121b48a8cd4aaf100551569dc70c6c704) for this as well, I’ll add that once the target is merged and working. With this, all PRs to get working code for amdgpu are open (this + the target + the two patches adding addrspacecasts for alloca and global variables). Tracking issue: #135024 r? `@workingjubilee`
Diffstat (limited to 'compiler/rustc_codegen_ssa/src/back')
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/back/linker.rs | 1 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/back/symbol_export.rs | 92 |
2 files changed, 61 insertions, 32 deletions
diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index 05d6ff35751..e3ace01c1eb 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) - symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate( tcx, symbol, cnum, )); + symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum); } }); diff --git a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs index 1dbaffaa577..12ee872d531 100644 --- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs +++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs @@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{ ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name, }; use rustc_middle::query::LocalCrate; -use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt}; +use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt}; use rustc_middle::util::Providers; use rustc_session::config::{CrateType, OomStrategy}; +use rustc_target::callconv::Conv; use rustc_target::spec::{SanitizerSet, TlsModel}; use tracing::debug; @@ -584,6 +585,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>( } } +fn calling_convention_for_symbol<'tcx>( + tcx: TyCtxt<'tcx>, + symbol: ExportedSymbol<'tcx>, +) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) { + let instance = match symbol { + ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _) + if tcx.is_static(def_id) => + { + None + } + ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)), + ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)), + // DropGlue always use the Rust calling convention and thus follow the target's default + // symbol decoration scheme. + ExportedSymbol::DropGlue(..) => None, + // AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the + // target's default symbol decoration scheme. + ExportedSymbol::AsyncDropGlueCtorShim(..) => None, + // NoDefId always follow the target's default symbol decoration scheme. + ExportedSymbol::NoDefId(..) => None, + // ThreadLocalShim always follow the target's default symbol decoration scheme. + ExportedSymbol::ThreadLocalShim(..) => None, + }; + + instance + .map(|i| { + tcx.fn_abi_of_instance( + ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())), + ) + .unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed")) + }) + .map(|fnabi| (fnabi.conv, &fnabi.args[..])) + // FIXME(workingjubilee): why don't we know the convention here? + .unwrap_or((Conv::Rust, &[])) +} + /// This is the symbol name of the given instance as seen by the linker. /// /// On 32-bit Windows symbols are decorated according to their calling conventions. @@ -592,8 +629,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>( symbol: ExportedSymbol<'tcx>, instantiating_crate: CrateNum, ) -> String { - use rustc_target::callconv::Conv; - let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate); // thread local will not be a function call, @@ -617,35 +652,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>( _ => return undecorated, }; - let instance = match symbol { - ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _) - if tcx.is_static(def_id) => - { - None - } - ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)), - ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)), - // DropGlue always use the Rust calling convention and thus follow the target's default - // symbol decoration scheme. - ExportedSymbol::DropGlue(..) => None, - // AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the - // target's default symbol decoration scheme. - ExportedSymbol::AsyncDropGlueCtorShim(..) => None, - // NoDefId always follow the target's default symbol decoration scheme. - ExportedSymbol::NoDefId(..) => None, - // ThreadLocalShim always follow the target's default symbol decoration scheme. - ExportedSymbol::ThreadLocalShim(..) => None, - }; - - let (conv, args) = instance - .map(|i| { - tcx.fn_abi_of_instance( - ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())), - ) - .unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed")) - }) - .map(|fnabi| (fnabi.conv, &fnabi.args[..])) - .unwrap_or((Conv::Rust, &[])); + let (conv, args) = calling_convention_for_symbol(tcx, symbol); // Decorate symbols with prefixes, suffixes and total number of bytes of arguments. // Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170 @@ -677,6 +684,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>( maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated) } +/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix. +/// Add it to the symbols list for all kernel functions, so that it is exported in the linked +/// object. +pub(crate) fn extend_exported_symbols<'tcx>( + symbols: &mut Vec<String>, + tcx: TyCtxt<'tcx>, + symbol: ExportedSymbol<'tcx>, + instantiating_crate: CrateNum, +) { + let (conv, _) = calling_convention_for_symbol(tcx, symbol); + + if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" { + return; + } + + let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate); + + // Add the symbol for the kernel descriptor (with .kd suffix) + symbols.push(format!("{undecorated}.kd")); +} + fn maybe_emutls_symbol_name<'tcx>( tcx: TyCtxt<'tcx>, symbol: ExportedSymbol<'tcx>, |
