about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_codegen_llvm/src')
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs51
-rw-r--r--compiler/rustc_codegen_llvm/src/back/lto.rs14
-rw-r--r--compiler/rustc_codegen_llvm/src/builder.rs29
-rw-r--r--compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs2
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs14
-rw-r--r--compiler/rustc_codegen_llvm/src/intrinsic.rs26
6 files changed, 97 insertions, 39 deletions
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 5affb26483a..573c51a9539 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -29,8 +29,18 @@ pub(crate) fn apply_to_callsite(callsite: &Value, idx: AttributePlace, attrs: &[
 }
 
 /// Get LLVM attribute for the provided inline heuristic.
-#[inline]
-fn inline_attr<'ll>(cx: &CodegenCx<'ll, '_>, inline: InlineAttr) -> Option<&'ll Attribute> {
+pub(crate) fn inline_attr<'ll, 'tcx>(
+    cx: &CodegenCx<'ll, 'tcx>,
+    instance: ty::Instance<'tcx>,
+) -> Option<&'ll Attribute> {
+    // `optnone` requires `noinline`
+    let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
+    let inline = match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) {
+        (_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never,
+        (InlineAttr::None, _) if instance.def.requires_inline(cx.tcx) => InlineAttr::Hint,
+        (inline, _) => inline,
+    };
+
     if !cx.tcx.sess.opts.unstable_opts.inline_llvm {
         // disable LLVM inlining
         return Some(AttributeKind::NoInline.create_attr(cx.llcx));
@@ -286,6 +296,19 @@ pub(crate) fn tune_cpu_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribu
         .map(|tune_cpu| llvm::CreateAttrStringValue(cx.llcx, "tune-cpu", tune_cpu))
 }
 
+/// Get the `target-features` LLVM attribute.
+pub(crate) fn target_features_attr<'ll>(
+    cx: &CodegenCx<'ll, '_>,
+    function_features: Vec<String>,
+) -> Option<&'ll Attribute> {
+    let global_features = cx.tcx.global_backend_features(()).iter().map(String::as_str);
+    let function_features = function_features.iter().map(String::as_str);
+    let target_features =
+        global_features.chain(function_features).intersperse(",").collect::<String>();
+    (!target_features.is_empty())
+        .then(|| llvm::CreateAttrStringValue(cx.llcx, "target-features", &target_features))
+}
+
 /// Get the `NonLazyBind` LLVM attribute,
 /// if the codegen options allow skipping the PLT.
 pub(crate) fn non_lazy_bind_attr<'ll>(cx: &CodegenCx<'ll, '_>) -> Option<&'ll Attribute> {
@@ -346,14 +369,6 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         OptimizeAttr::Speed => {}
     }
 
-    // `optnone` requires `noinline`
-    let inline = match (codegen_fn_attrs.inline, &codegen_fn_attrs.optimize) {
-        (_, OptimizeAttr::DoNotOptimize) => InlineAttr::Never,
-        (InlineAttr::None, _) if instance.def.requires_inline(cx.tcx) => InlineAttr::Hint,
-        (inline, _) => inline,
-    };
-    to_add.extend(inline_attr(cx, inline));
-
     if cx.sess().must_emit_unwind_tables() {
         to_add.push(uwtable_attr(cx.llcx, cx.sess().opts.unstable_opts.use_sync_unwind));
     }
@@ -488,6 +503,14 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
     let function_features =
         codegen_fn_attrs.target_features.iter().map(|f| f.name.as_str()).collect::<Vec<&str>>();
 
+    // Apply function attributes as per usual if there are no user defined
+    // target features otherwise this will get applied at the callsite.
+    if function_features.is_empty() {
+        if let Some(inline_attr) = inline_attr(cx, instance) {
+            to_add.push(inline_attr);
+        }
+    }
+
     let function_features = function_features
         .iter()
         // Convert to LLVMFeatures and filter out unavailable ones
@@ -513,13 +536,7 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         }
     }
 
-    let global_features = cx.tcx.global_backend_features(()).iter().map(|s| s.as_str());
-    let function_features = function_features.iter().map(|s| s.as_str());
-    let target_features: String =
-        global_features.chain(function_features).intersperse(",").collect();
-    if !target_features.is_empty() {
-        to_add.push(llvm::CreateAttrStringValue(cx.llcx, "target-features", &target_features));
-    }
+    to_add.extend(target_features_attr(cx, function_features));
 
     attributes::apply_to_llfn(llfn, Function, &to_add);
 }
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index fc38c4f3e51..ad2e722cfef 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -43,9 +43,7 @@ fn prepare_lto(
         .map(|symbol| CString::new(symbol.to_owned()).unwrap())
         .collect::<Vec<CString>>();
 
-    if cgcx.regular_module_config.instrument_coverage
-        || cgcx.regular_module_config.pgo_gen.enabled()
-    {
+    if cgcx.module_config.instrument_coverage || cgcx.module_config.pgo_gen.enabled() {
         // These are weak symbols that point to the profile version and the
         // profile name, which need to be treated as exported so LTO doesn't nix
         // them.
@@ -55,15 +53,15 @@ fn prepare_lto(
         symbols_below_threshold.extend(PROFILER_WEAK_SYMBOLS.iter().map(|&sym| sym.to_owned()));
     }
 
-    if cgcx.regular_module_config.sanitizer.contains(SanitizerSet::MEMORY) {
+    if cgcx.module_config.sanitizer.contains(SanitizerSet::MEMORY) {
         let mut msan_weak_symbols = Vec::new();
 
         // Similar to profiling, preserve weak msan symbol during LTO.
-        if cgcx.regular_module_config.sanitizer_recover.contains(SanitizerSet::MEMORY) {
+        if cgcx.module_config.sanitizer_recover.contains(SanitizerSet::MEMORY) {
             msan_weak_symbols.push(c"__msan_keep_going");
         }
 
-        if cgcx.regular_module_config.sanitizer_memory_track_origins != 0 {
+        if cgcx.module_config.sanitizer_memory_track_origins != 0 {
             msan_weak_symbols.push(c"__msan_track_origins");
         }
 
@@ -583,7 +581,7 @@ pub(crate) fn run_pass_manager(
     thin: bool,
 ) {
     let _timer = cgcx.prof.generic_activity_with_arg("LLVM_lto_optimize", &*module.name);
-    let config = cgcx.config(module.kind);
+    let config = &cgcx.module_config;
 
     // Now we have one massive module inside of llmod. Time to run the
     // LTO-specific optimization passes that LLVM provides.
@@ -745,7 +743,7 @@ pub(crate) fn optimize_thin_module(
     let module_llvm = ModuleLlvm::parse(cgcx, module_name, thin_module.data(), dcx);
     let mut module = ModuleCodegen::new_regular(thin_module.name(), module_llvm);
     // Given that the newly created module lacks a thinlto buffer for embedding, we need to re-add it here.
-    if cgcx.config(ModuleKind::Regular).embed_bitcode() {
+    if cgcx.module_config.embed_bitcode() {
         module.thin_lto_buffer = Some(thin_module.data().to_vec());
     }
     {
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
index 37379586d58..7d0691366e6 100644
--- a/compiler/rustc_codegen_llvm/src/builder.rs
+++ b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1392,7 +1392,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
     fn call(
         &mut self,
         llty: &'ll Type,
-        fn_attrs: Option<&CodegenFnAttrs>,
+        fn_call_attrs: Option<&CodegenFnAttrs>,
         fn_abi: Option<&FnAbi<'tcx, Ty<'tcx>>>,
         llfn: &'ll Value,
         args: &[&'ll Value],
@@ -1409,10 +1409,10 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
         }
 
         // Emit CFI pointer type membership test
-        self.cfi_type_test(fn_attrs, fn_abi, instance, llfn);
+        self.cfi_type_test(fn_call_attrs, fn_abi, instance, llfn);
 
         // Emit KCFI operand bundle
-        let kcfi_bundle = self.kcfi_operand_bundle(fn_attrs, fn_abi, instance, llfn);
+        let kcfi_bundle = self.kcfi_operand_bundle(fn_call_attrs, fn_abi, instance, llfn);
         if let Some(kcfi_bundle) = kcfi_bundle.as_ref().map(|b| b.as_ref()) {
             bundles.push(kcfi_bundle);
         }
@@ -1429,6 +1429,29 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
                 c"".as_ptr(),
             )
         };
+
+        if let Some(instance) = instance {
+            // Attributes on the function definition being called
+            let fn_defn_attrs = self.cx.tcx.codegen_fn_attrs(instance.def_id());
+            if let Some(fn_call_attrs) = fn_call_attrs
+                && !fn_call_attrs.target_features.is_empty()
+                // If there is an inline attribute and a target feature that matches
+                // we will add the attribute to the callsite otherwise we'll omit
+                // this and not add the attribute to prevent soundness issues.
+                && let Some(inlining_rule) = attributes::inline_attr(&self.cx, instance)
+                && self.cx.tcx.is_target_feature_call_safe(
+                    &fn_call_attrs.target_features,
+                    &fn_defn_attrs.target_features,
+                )
+            {
+                attributes::apply_to_callsite(
+                    call,
+                    llvm::AttributePlace::Function,
+                    &[inlining_rule],
+                );
+            }
+        }
+
         if let Some(fn_abi) = fn_abi {
             fn_abi.apply_attrs_callsite(self, call);
         }
diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs
index 1280ab1442a..0737a18384b 100644
--- a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs
@@ -193,7 +193,7 @@ fn gen_define_handling<'ll>(
     // reference) types.
     let num_ptr_types = types
         .iter()
-        .map(|&x| matches!(cx.type_kind(x), rustc_codegen_ssa::common::TypeKind::Pointer))
+        .filter(|&x| matches!(cx.type_kind(x), rustc_codegen_ssa::common::TypeKind::Pointer))
         .count();
 
     // We do not know their size anymore at this level, so hardcode a placeholder.
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 4fd6110ac4a..a69fa54a54a 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -217,6 +217,10 @@ pub(crate) unsafe fn create_module<'ll>(
             // LLVM 22.0 updated the default layout on avr: https://github.com/llvm/llvm-project/pull/153010
             target_data_layout = target_data_layout.replace("n8:16", "n8")
         }
+        if sess.target.arch == "nvptx64" {
+            // LLVM 22 updated the NVPTX layout to indicate 256-bit vector load/store: https://github.com/llvm/llvm-project/pull/155198
+            target_data_layout = target_data_layout.replace("-i256:256", "");
+        }
     }
 
     // Ensure the data-layout values hardcoded remain the defaults.
@@ -849,7 +853,7 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     fn declare_c_main(&self, fn_type: Self::Type) -> Option<Self::Function> {
         let entry_name = self.sess().target.entry_name.as_ref();
         if self.get_declared_value(entry_name).is_none() {
-            Some(self.declare_entry_fn(
+            let llfn = self.declare_entry_fn(
                 entry_name,
                 llvm::CallConv::from_conv(
                     self.sess().target.entry_abi,
@@ -857,7 +861,13 @@ impl<'ll, 'tcx> MiscCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
                 ),
                 llvm::UnnamedAddr::Global,
                 fn_type,
-            ))
+            );
+            attributes::apply_to_llfn(
+                llfn,
+                llvm::AttributePlace::Function,
+                attributes::target_features_attr(self, vec![]).as_slice(),
+            );
+            Some(llfn)
         } else {
             // If the symbol already exists, it is an error: for example, the user wrote
             // #[no_mangle] extern "C" fn main(..) {..}
diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs
index 49d3dedbeab..85f71f331a4 100644
--- a/compiler/rustc_codegen_llvm/src/intrinsic.rs
+++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs
@@ -383,7 +383,9 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
             | sym::rotate_left
             | sym::rotate_right
             | sym::saturating_add
-            | sym::saturating_sub => {
+            | sym::saturating_sub
+            | sym::unchecked_funnel_shl
+            | sym::unchecked_funnel_shr => {
                 let ty = args[0].layout.ty;
                 if !ty.is_integral() {
                     tcx.dcx().emit_err(InvalidMonomorphization::BasicIntegerType {
@@ -424,18 +426,26 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
                     sym::bitreverse => {
                         self.call_intrinsic("llvm.bitreverse", &[llty], &[args[0].immediate()])
                     }
-                    sym::rotate_left | sym::rotate_right => {
-                        let is_left = name == sym::rotate_left;
-                        let val = args[0].immediate();
-                        let raw_shift = args[1].immediate();
-                        // rotate = funnel shift with first two args the same
+                    sym::rotate_left
+                    | sym::rotate_right
+                    | sym::unchecked_funnel_shl
+                    | sym::unchecked_funnel_shr => {
+                        let is_left = name == sym::rotate_left || name == sym::unchecked_funnel_shl;
+                        let lhs = args[0].immediate();
+                        let (rhs, raw_shift) =
+                            if name == sym::rotate_left || name == sym::rotate_right {
+                                // rotate = funnel shift with first two args the same
+                                (lhs, args[1].immediate())
+                            } else {
+                                (args[1].immediate(), args[2].immediate())
+                            };
                         let llvm_name = format!("llvm.fsh{}", if is_left { 'l' } else { 'r' });
 
                         // llvm expects shift to be the same type as the values, but rust
                         // always uses `u32`.
-                        let raw_shift = self.intcast(raw_shift, self.val_ty(val), false);
+                        let raw_shift = self.intcast(raw_shift, self.val_ty(lhs), false);
 
-                        self.call_intrinsic(llvm_name, &[llty], &[val, val, raw_shift])
+                        self.call_intrinsic(llvm_name, &[llty], &[lhs, rhs, raw_shift])
                     }
                     sym::saturating_add | sym::saturating_sub => {
                         let is_add = name == sym::saturating_add;