diff options
| author | bors <bors@rust-lang.org> | 2022-02-27 09:23:24 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2022-02-27 09:23:24 +0000 |
| commit | 2bd9656c80ff06412c833bd9a6e7118a81bb95fc (patch) | |
| tree | 8bba3eafb54303880322970d5ef4a1d0e83d92c1 /compiler/rustc_codegen_llvm/src/attributes.rs | |
| parent | 93230281562cd6b1b45eff070c473e3be20d9e72 (diff) | |
| parent | 0d0cc4f6a0cb48600f183c382986df1897bdb7dc (diff) | |
| download | rust-2bd9656c80ff06412c833bd9a6e7118a81bb95fc.tar.gz rust-2bd9656c80ff06412c833bd9a6e7118a81bb95fc.zip | |
Auto merge of #94221 - erikdesjardins:addattr, r=nikic
Add LLVM attributes in batches instead of individually This should improve performance. ~r? `@ghost` (blocked on #94127)~
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/attributes.rs')
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/attributes.rs | 284 |
1 files changed, 145 insertions, 139 deletions
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index f6d7221d4e9..13a41388f5e 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -7,53 +7,75 @@ use rustc_codegen_ssa::traits::*; use rustc_data_structures::small_c_str::SmallCStr; use rustc_hir::def_id::DefId; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; -use rustc_middle::ty::layout::HasTyCtxt; use rustc_middle::ty::{self, TyCtxt}; use rustc_session::config::OptLevel; -use rustc_session::Session; use rustc_target::spec::abi::Abi; use rustc_target::spec::{FramePointer, SanitizerSet, StackProbeType, StackProtector}; +use smallvec::SmallVec; use crate::attributes; use crate::llvm::AttributePlace::Function; -use crate::llvm::{self, Attribute}; +use crate::llvm::{self, Attribute, AttributeKind, AttributePlace}; use crate::llvm_util; pub use rustc_attr::{InlineAttr, InstructionSetAttr, OptimizeAttr}; use crate::context::CodegenCx; use crate::value::Value; -/// Mark LLVM function to use provided inline heuristic. +pub fn apply_to_llfn(llfn: &Value, idx: AttributePlace, attrs: &[&Attribute]) { + if !attrs.is_empty() { + llvm::AddFunctionAttributes(llfn, idx, attrs); + } +} + +pub fn remove_from_llfn(llfn: &Value, idx: AttributePlace, attrs: &[AttributeKind]) { + if !attrs.is_empty() { + llvm::RemoveFunctionAttributes(llfn, idx, attrs); + } +} + +pub fn apply_to_callsite(callsite: &Value, idx: AttributePlace, attrs: &[&Attribute]) { + if !attrs.is_empty() { + llvm::AddCallSiteAttributes(callsite, idx, attrs); + } +} + +/// Get LLVM attribute for the provided inline heuristic. #[inline] -fn inline<'ll>(cx: &CodegenCx<'ll, '_>, val: &'ll Value, inline: InlineAttr) { - use self::InlineAttr::*; +fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll Attribute> { match inline { - Hint => Attribute::InlineHint.apply_llfn(Function, val), - Always => Attribute::AlwaysInline.apply_llfn(Function, val), - Never => { - if cx.tcx().sess.target.arch != "amdgpu" { - Attribute::NoInline.apply_llfn(Function, val); + InlineAttr::Hint => Some(AttributeKind::InlineHint.create_attr(cx.llcx)), + InlineAttr::Always => Some(AttributeKind::AlwaysInline.create_attr(cx.llcx)), + InlineAttr::Never => { + if cx.sess().target.arch != "amdgpu" { + Some(AttributeKind::NoInline.create_attr(cx.llcx)) + } else { + None } } - None => {} - }; + InlineAttr::None => None, + } } -/// Apply LLVM sanitize attributes. +/// Get LLVM sanitize attributes. #[inline] -pub fn sanitize<'ll>(cx: &CodegenCx<'ll, '_>, no_sanitize: SanitizerSet, llfn: &'ll Value) { +pub fn sanitize_attrs<'ll>( + cx: &CodegenCx<'ll, '_>, + no_sanitize: SanitizerSet, +) -> SmallVec<[&'ll Attribute; 4]> { + let mut attrs = SmallVec::new(); let enabled = cx.tcx.sess.opts.debugging_opts.sanitizer - no_sanitize; if enabled.contains(SanitizerSet::ADDRESS) { - llvm::Attribute::SanitizeAddress.apply_llfn(Function, llfn); + attrs.push(llvm::AttributeKind::SanitizeAddress.create_attr(cx.llcx)); } if enabled.contains(SanitizerSet::MEMORY) { - llvm::Attribute::SanitizeMemory.apply_llfn(Function, llfn); + attrs.push(llvm::AttributeKind::SanitizeMemory.create_attr(cx.llcx)); } if enabled.contains(SanitizerSet::THREAD) { - llvm::Attribute::SanitizeThread.apply_llfn(Function, llfn); + attrs.push(llvm::AttributeKind::SanitizeThread.create_attr(cx.llcx)); } if enabled.contains(SanitizerSet::HWADDRESS) { - llvm::Attribute::SanitizeHWAddress.apply_llfn(Function, llfn); + attrs.push(llvm::AttributeKind::SanitizeHWAddress.create_attr(cx.llcx)); } if enabled.contains(SanitizerSet::MEMTAG) { // Check to make sure the mte target feature is actually enabled. @@ -66,26 +88,21 @@ pub fn sanitize<'ll>(cx: &CodegenCx<'ll, '_>, no_sanitize: SanitizerSet, llfn: & sess.err("`-Zsanitizer=memtag` requires `-Ctarget-feature=+mte`"); } - llvm::Attribute::SanitizeMemTag.apply_llfn(Function, llfn); + attrs.push(llvm::AttributeKind::SanitizeMemTag.create_attr(cx.llcx)); } + attrs } /// Tell LLVM to emit or not emit the information necessary to unwind the stack for the function. #[inline] -pub fn emit_uwtable(val: &Value) { +pub fn uwtable_attr(llcx: &llvm::Context) -> &Attribute { // NOTE: We should determine if we even need async unwind tables, as they // take have more overhead and if we can use sync unwind tables we // probably should. - llvm::EmitUWTableAttr(val, true); -} - -/// Tell LLVM if this function should be 'naked', i.e., skip the epilogue and prologue. -#[inline] -fn naked(val: &Value, is_naked: bool) { - Attribute::Naked.toggle_llfn(Function, val, is_naked); + llvm::CreateUWTableAttr(llcx, true) } -pub fn set_frame_pointer_type<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { +pub fn frame_pointer_type_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> { let mut fp = cx.sess().target.frame_pointer; // "mcount" function relies on stack pointer. // See <https://sourceware.org/binutils/docs/gprof/Implementation.html>. @@ -96,19 +113,14 @@ pub fn set_frame_pointer_type<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { let attr_value = match fp { FramePointer::Always => cstr!("all"), FramePointer::NonLeaf => cstr!("non-leaf"), - FramePointer::MayOmit => return, + FramePointer::MayOmit => return None, }; - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, - cstr!("frame-pointer"), - attr_value, - ); + Some(llvm::CreateAttrStringValue(cx.llcx, cstr!("frame-pointer"), attr_value)) } /// Tell LLVM what instrument function to insert. #[inline] -fn set_instrument_function<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { +fn instrument_function_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> { if cx.sess().instrument_mcount() { // Similar to `clang -pg` behavior. Handled by the // `post-inline-ee-instrument` LLVM pass. @@ -117,16 +129,17 @@ fn set_instrument_function<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { // See test/CodeGen/mcount.c in clang. let mcount_name = CString::new(cx.sess().target.mcount.as_str().as_bytes()).unwrap(); - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, + Some(llvm::CreateAttrStringValue( + cx.llcx, cstr!("instrument-function-entry-inlined"), &mcount_name, - ); + )) + } else { + None } } -fn set_probestack<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { +fn probestack_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> { // Currently stack probes seem somewhat incompatible with the address // sanitizer and thread sanitizer. With asan we're already protected from // stack overflow anyway so we don't really need stack probes regardless. @@ -137,107 +150,105 @@ fn set_probestack<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { .sanitizer .intersects(SanitizerSet::ADDRESS | SanitizerSet::THREAD) { - return; + return None; } // probestack doesn't play nice either with `-C profile-generate`. if cx.sess().opts.cg.profile_generate.enabled() { - return; + return None; } // probestack doesn't play nice either with gcov profiling. if cx.sess().opts.debugging_opts.profile { - return; + return None; } let attr_value = match cx.sess().target.stack_probes { - StackProbeType::None => None, + StackProbeType::None => return None, // Request LLVM to generate the probes inline. If the given LLVM version does not support // this, no probe is generated at all (even if the attribute is specified). - StackProbeType::Inline => Some(cstr!("inline-asm")), + StackProbeType::Inline => cstr!("inline-asm"), // Flag our internal `__rust_probestack` function as the stack probe symbol. // This is defined in the `compiler-builtins` crate for each architecture. - StackProbeType::Call => Some(cstr!("__rust_probestack")), + StackProbeType::Call => cstr!("__rust_probestack"), // Pick from the two above based on the LLVM version. StackProbeType::InlineOrCall { min_llvm_version_for_inline } => { if llvm_util::get_version() < min_llvm_version_for_inline { - Some(cstr!("__rust_probestack")) + cstr!("__rust_probestack") } else { - Some(cstr!("inline-asm")) + cstr!("inline-asm") } } }; - if let Some(attr_value) = attr_value { - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, - cstr!("probe-stack"), - attr_value, - ); - } + Some(llvm::CreateAttrStringValue(cx.llcx, cstr!("probe-stack"), attr_value)) } -fn set_stackprotector<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { +fn stackprotector_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> { let sspattr = match cx.sess().stack_protector() { - StackProtector::None => return, - StackProtector::All => Attribute::StackProtectReq, - StackProtector::Strong => Attribute::StackProtectStrong, - StackProtector::Basic => Attribute::StackProtect, + StackProtector::None => return None, + StackProtector::All => AttributeKind::StackProtectReq, + StackProtector::Strong => AttributeKind::StackProtectStrong, + StackProtector::Basic => AttributeKind::StackProtect, }; - sspattr.apply_llfn(Function, llfn) + Some(sspattr.create_attr(cx.llcx)) } -pub fn apply_target_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { +pub fn target_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll Attribute { let target_cpu = SmallCStr::new(llvm_util::target_cpu(cx.tcx.sess)); - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, - cstr!("target-cpu"), - target_cpu.as_c_str(), - ); + llvm::CreateAttrStringValue(cx.llcx, cstr!("target-cpu"), target_cpu.as_c_str()) } -pub fn apply_tune_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) { - if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) { +pub fn tune_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> { + llvm_util::tune_cpu(cx.tcx.sess).map(|tune| { let tune_cpu = SmallCStr::new(tune); - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, - cstr!("tune-cpu"), - tune_cpu.as_c_str(), - ); - } + llvm::CreateAttrStringValue(cx.llcx, cstr!("tune-cpu"), tune_cpu.as_c_str()) + }) } -/// Sets the `NonLazyBind` LLVM attribute on a given function, -/// assuming the codegen options allow skipping the PLT. -pub fn non_lazy_bind<'ll>(sess: &Session, llfn: &'ll Value) { +/// Get the `NonLazyBind` LLVM attribute, +/// if the codegen options allow skipping the PLT. +pub fn non_lazy_bind_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> { // Don't generate calls through PLT if it's not necessary - if !sess.needs_plt() { - Attribute::NonLazyBind.apply_llfn(Function, llfn); + if !cx.sess().needs_plt() { + Some(AttributeKind::NonLazyBind.create_attr(cx.llcx)) + } else { + None } } -pub(crate) fn default_optimisation_attrs<'ll>(sess: &Session, llfn: &'ll Value) { - match sess.opts.optimize { +/// Returns attributes to remove and to add, respectively, +/// to set the default optimizations attrs on a function. +#[inline] +pub(crate) fn default_optimisation_attrs<'ll>( + cx: &CodegenCx<'ll, '_>, +) -> ( + // Attributes to remove + SmallVec<[AttributeKind; 3]>, + // Attributes to add + SmallVec<[&'ll Attribute; 2]>, +) { + let mut to_remove = SmallVec::new(); + let mut to_add = SmallVec::new(); + match cx.sess().opts.optimize { OptLevel::Size => { - llvm::Attribute::MinSize.unapply_llfn(Function, llfn); - llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn); - llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn); + to_remove.push(llvm::AttributeKind::MinSize); + to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx)); + to_remove.push(llvm::AttributeKind::OptimizeNone); } OptLevel::SizeMin => { - llvm::Attribute::MinSize.apply_llfn(Function, llfn); - llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn); - llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn); + to_add.push(llvm::AttributeKind::MinSize.create_attr(cx.llcx)); + to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx)); + to_remove.push(llvm::AttributeKind::OptimizeNone); } OptLevel::No => { - llvm::Attribute::MinSize.unapply_llfn(Function, llfn); - llvm::Attribute::OptimizeForSize.unapply_llfn(Function, llfn); - llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn); + to_remove.push(llvm::AttributeKind::MinSize); + to_remove.push(llvm::AttributeKind::OptimizeForSize); + to_remove.push(llvm::AttributeKind::OptimizeNone); } _ => {} } + (to_remove, to_add) } /// Composite function which sets LLVM attributes for function depending on its AST (`#[attribute]`) @@ -249,30 +260,35 @@ pub fn from_fn_attrs<'ll, 'tcx>( ) { let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id()); + let mut to_remove = SmallVec::<[_; 4]>::new(); + let mut to_add = SmallVec::<[_; 16]>::new(); + match codegen_fn_attrs.optimize { OptimizeAttr::None => { - default_optimisation_attrs(cx.tcx.sess, llfn); + let (to_remove_opt, to_add_opt) = default_optimisation_attrs(cx); + to_remove.extend(to_remove_opt); + to_add.extend(to_add_opt); } OptimizeAttr::Speed => { - llvm::Attribute::MinSize.unapply_llfn(Function, llfn); - llvm::Attribute::OptimizeForSize.unapply_llfn(Function, llfn); - llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn); + to_remove.push(llvm::AttributeKind::MinSize); + to_remove.push(llvm::AttributeKind::OptimizeForSize); + to_remove.push(llvm::AttributeKind::OptimizeNone); } OptimizeAttr::Size => { - llvm::Attribute::MinSize.apply_llfn(Function, llfn); - llvm::Attribute::OptimizeForSize.apply_llfn(Function, llfn); - llvm::Attribute::OptimizeNone.unapply_llfn(Function, llfn); + to_add.push(llvm::AttributeKind::MinSize.create_attr(cx.llcx)); + to_add.push(llvm::AttributeKind::OptimizeForSize.create_attr(cx.llcx)); + to_remove.push(llvm::AttributeKind::OptimizeNone); } } - let inline_attr = if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) { + let inline = if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) { InlineAttr::Never } else if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) { InlineAttr::Hint } else { codegen_fn_attrs.inline }; - inline(cx, llfn, inline_attr); + to_add.extend(inline_attr(cx, inline)); // The `uwtable` attribute according to LLVM is: // @@ -291,52 +307,54 @@ pub fn from_fn_attrs<'ll, 'tcx>( // You can also find more info on why Windows always requires uwtables here: // https://bugzilla.mozilla.org/show_bug.cgi?id=1302078 if cx.sess().must_emit_unwind_tables() { - attributes::emit_uwtable(llfn); + to_add.push(uwtable_attr(cx.llcx)); } if cx.sess().opts.debugging_opts.profile_sample_use.is_some() { - llvm::AddFunctionAttrString(llfn, Function, cstr!("use-sample-profile")); + to_add.push(llvm::CreateAttrString(cx.llcx, cstr!("use-sample-profile"))); } // FIXME: none of these three functions interact with source level attributes. - set_frame_pointer_type(cx, llfn); - set_instrument_function(cx, llfn); - set_probestack(cx, llfn); - set_stackprotector(cx, llfn); + to_add.extend(frame_pointer_type_attr(cx)); + to_add.extend(instrument_function_attr(cx)); + to_add.extend(probestack_attr(cx)); + to_add.extend(stackprotector_attr(cx)); if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) { - Attribute::Cold.apply_llfn(Function, llfn); + to_add.push(AttributeKind::Cold.create_attr(cx.llcx)); } if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_RETURNS_TWICE) { - Attribute::ReturnsTwice.apply_llfn(Function, llfn); + to_add.push(AttributeKind::ReturnsTwice.create_attr(cx.llcx)); } if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_PURE) { - Attribute::ReadOnly.apply_llfn(Function, llfn); + to_add.push(AttributeKind::ReadOnly.create_attr(cx.llcx)); } if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::FFI_CONST) { - Attribute::ReadNone.apply_llfn(Function, llfn); + to_add.push(AttributeKind::ReadNone.create_attr(cx.llcx)); } if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) { - naked(llfn, true); + to_add.push(AttributeKind::Naked.create_attr(cx.llcx)); } if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::ALLOCATOR) { - Attribute::NoAlias.apply_llfn(llvm::AttributePlace::ReturnValue, llfn); + // apply to return place instead of function (unlike all other attributes applied in this function) + let no_alias = AttributeKind::NoAlias.create_attr(cx.llcx); + attributes::apply_to_llfn(llfn, AttributePlace::ReturnValue, &[no_alias]); } if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::CMSE_NONSECURE_ENTRY) { - llvm::AddFunctionAttrString(llfn, Function, cstr!("cmse_nonsecure_entry")); + to_add.push(llvm::CreateAttrString(cx.llcx, cstr!("cmse_nonsecure_entry"))); } if let Some(align) = codegen_fn_attrs.alignment { llvm::set_alignment(llfn, align as usize); } - sanitize(cx, codegen_fn_attrs.no_sanitize, llfn); + to_add.extend(sanitize_attrs(cx, codegen_fn_attrs.no_sanitize)); // Always annotate functions with the target-cpu they are compiled for. // Without this, ThinLTO won't inline Rust functions into Clang generated // functions (because Clang annotates functions this way too). - apply_target_cpu_attr(cx, llfn); + to_add.push(target_cpu_attr(cx)); // tune-cpu is only conveyed through the attribute for our purpose. // The target doesn't care; the subtarget reads our attribute. - apply_tune_cpu_attr(cx, llfn); + to_add.extend(tune_cpu_attr(cx)); let function_features = codegen_fn_attrs.target_features.iter().map(|f| f.as_str()).collect::<Vec<&str>>(); @@ -379,22 +397,12 @@ pub fn from_fn_attrs<'ll, 'tcx>( // If this function is an import from the environment but the wasm // import has a specific module/name, apply them here. if let Some(module) = wasm_import_module(cx.tcx, instance.def_id()) { - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, - cstr!("wasm-import-module"), - &module, - ); + to_add.push(llvm::CreateAttrStringValue(cx.llcx, cstr!("wasm-import-module"), &module)); let name = codegen_fn_attrs.link_name.unwrap_or_else(|| cx.tcx.item_name(instance.def_id())); let name = CString::new(name.as_str()).unwrap(); - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, - cstr!("wasm-import-name"), - &name, - ); + to_add.push(llvm::CreateAttrStringValue(cx.llcx, cstr!("wasm-import-name"), &name)); } // The `"wasm"` abi on wasm targets automatically enables the @@ -414,13 +422,11 @@ pub fn from_fn_attrs<'ll, 'tcx>( global_features.extend(function_features.into_iter()); let features = global_features.join(","); let val = CString::new(features).unwrap(); - llvm::AddFunctionAttrStringValue( - llfn, - llvm::AttributePlace::Function, - cstr!("target-features"), - &val, - ); + to_add.push(llvm::CreateAttrStringValue(cx.llcx, cstr!("target-features"), &val)); } + + attributes::remove_from_llfn(llfn, Function, &to_remove); + attributes::apply_to_llfn(llfn, Function, &to_add); } fn wasm_import_module(tcx: TyCtxt<'_>, id: DefId) -> Option<CString> { |
