about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2025-03-11 12:05:16 +0000
committerbors <bors@rust-lang.org>2025-03-11 12:05:16 +0000
commitebf0cf75d368c035f4c7e7246d203bd469ee4a51 (patch)
tree1c26d90ad88273c67bd0f169b122ebd9a4b88e84
parent705421b52239d7393b4738764b192179d5c139c2 (diff)
parentcee311454427bf5049f5493bb7c7d74df2abb369 (diff)
downloadrust-ebf0cf75d368c035f4c7e7246d203bd469ee4a51.tar.gz
rust-ebf0cf75d368c035f4c7e7246d203bd469ee4a51.zip
Auto merge of #137586 - nnethercote:SetImpliedBits, r=bjorn3
Speed up target feature computation

The LLVM backend calls `LLVMRustHasFeature` twice for every feature. In short-running rustc invocations, this accounts for a surprising amount of work.

r? `@bjorn3`
-rw-r--r--compiler/rustc_codegen_cranelift/src/lib.rs13
-rw-r--r--compiler/rustc_codegen_gcc/src/gcc_util.rs2
-rw-r--r--compiler/rustc_codegen_gcc/src/lib.rs68
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs4
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs115
-rw-r--r--compiler/rustc_codegen_ssa/src/target_features.rs2
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/backend.rs9
-rw-r--r--compiler/rustc_interface/src/util.rs8
-rw-r--r--compiler/rustc_target/src/target_features.rs12
9 files changed, 120 insertions, 113 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index a3f43744875..06939beb374 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -176,13 +176,9 @@ impl CodegenBackend for CraneliftCodegenBackend {
         }
     }
 
-    fn target_features_cfg(
-        &self,
-        sess: &Session,
-        _allow_unstable: bool,
-    ) -> Vec<rustc_span::Symbol> {
+    fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
         // FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
-        if sess.target.arch == "x86_64" && sess.target.os != "none" {
+        let target_features = if sess.target.arch == "x86_64" && sess.target.os != "none" {
             // x86_64 mandates SSE2 support and rustc requires the x87 feature to be enabled
             vec![sym::fsxr, sym::sse, sym::sse2, Symbol::intern("x87")]
         } else if sess.target.arch == "aarch64" {
@@ -196,7 +192,10 @@ impl CodegenBackend for CraneliftCodegenBackend {
             }
         } else {
             vec![]
-        }
+        };
+        // FIXME do `unstable_target_features` properly
+        let unstable_target_features = target_features.clone();
+        (target_features, unstable_target_features)
     }
 
     fn print_version(&self) {
diff --git a/compiler/rustc_codegen_gcc/src/gcc_util.rs b/compiler/rustc_codegen_gcc/src/gcc_util.rs
index 4e8c8aaaf5c..6eae0c24f48 100644
--- a/compiler/rustc_codegen_gcc/src/gcc_util.rs
+++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs
@@ -48,7 +48,7 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<Stri
     for feature in sess.opts.cg.target_feature.split(',') {
         if let Some(feature) = feature.strip_prefix('+') {
             all_rust_features.extend(
-                UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
+                UnordSet::from(sess.target.implied_target_features(feature))
                     .to_sorted_stable_ord()
                     .iter()
                     .map(|&&s| (true, s)),
diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index f090597f953..d478b2af46c 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -259,8 +259,8 @@ impl CodegenBackend for GccCodegenBackend {
             .join(sess)
     }
 
-    fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-        target_features_cfg(sess, allow_unstable, &self.target_info)
+    fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
+        target_features_cfg(sess, &self.target_info)
     }
 }
 
@@ -486,35 +486,41 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel {
 /// Returns the features that should be set in `cfg(target_feature)`.
 fn target_features_cfg(
     sess: &Session,
-    allow_unstable: bool,
     target_info: &LockedTargetInfo,
-) -> Vec<Symbol> {
+) -> (Vec<Symbol>, Vec<Symbol>) {
     // TODO(antoyo): use global_gcc_features.
-    sess.target
-        .rust_target_features()
-        .iter()
-        .filter_map(|&(feature, gate, _)| {
-            if allow_unstable
-                || (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
-            {
-                Some(feature)
-            } else {
-                None
-            }
-        })
-        .filter(|feature| {
-            // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
-            if *feature == "neon" {
-                return false;
-            }
-            target_info.cpu_supports(feature)
-            /*
-              adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
-              avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
-              bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
-              sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
-            */
-        })
-        .map(Symbol::intern)
-        .collect()
+    let f = |allow_unstable| {
+        sess.target
+            .rust_target_features()
+            .iter()
+            .filter_map(|&(feature, gate, _)| {
+                if allow_unstable
+                    || (gate.in_cfg()
+                        && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
+                {
+                    Some(feature)
+                } else {
+                    None
+                }
+            })
+            .filter(|feature| {
+                // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
+                if *feature == "neon" {
+                    return false;
+                }
+                target_info.cpu_supports(feature)
+                /*
+                  adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
+                  avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
+                  bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
+                  sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
+                */
+            })
+            .map(Symbol::intern)
+            .collect()
+    };
+
+    let target_features = f(false);
+    let unstable_target_features = f(true);
+    (target_features, unstable_target_features)
 }
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index e51d4852db2..dec93bbccc3 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -341,8 +341,8 @@ impl CodegenBackend for LlvmCodegenBackend {
         llvm_util::print_version();
     }
 
-    fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-        target_features_cfg(sess, allow_unstable)
+    fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
+        target_features_cfg(sess)
     }
 
     fn codegen_crate<'tcx>(
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 5cc4f4ab9e6..4a166b0872d 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -306,45 +306,44 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
 /// Must express features in the way Rust understands them.
 ///
 /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
-pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
-    let mut features: FxHashSet<Symbol> = Default::default();
-
+pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
     // Add base features for the target.
     // We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
     // The reason is that if LLVM considers a feature implied but we do not, we don't want that to
     // show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
-    // the target CPU, that is still expanded to target features (with all their implied features) by
-    // LLVM.
+    // the target CPU, that is still expanded to target features (with all their implied features)
+    // by LLVM.
     let target_machine = create_informational_target_machine(sess, true);
-    // Compute which of the known target features are enabled in the 'base' target machine.
-    // We only consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
-    features.extend(
-        sess.target
-            .rust_target_features()
-            .iter()
-            .filter(|(feature, _, _)| {
-                // skip checking special features, as LLVM may not understand them
-                if RUSTC_SPECIAL_FEATURES.contains(feature) {
-                    return true;
-                }
-                // check that all features in a given smallvec are enabled
-                if let Some(feat) = to_llvm_features(sess, feature) {
-                    for llvm_feature in feat {
-                        let cstr = SmallCStr::new(llvm_feature);
-                        if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) }
-                        {
-                            return false;
-                        }
+    // Compute which of the known target features are enabled in the 'base' target machine. We only
+    // consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
+    let mut features: FxHashSet<Symbol> = sess
+        .target
+        .rust_target_features()
+        .iter()
+        .filter(|(feature, _, _)| {
+            // skip checking special features, as LLVM may not understand them
+            if RUSTC_SPECIAL_FEATURES.contains(feature) {
+                return true;
+            }
+            if let Some(feat) = to_llvm_features(sess, feature) {
+                for llvm_feature in feat {
+                    let cstr = SmallCStr::new(llvm_feature);
+                    // `LLVMRustHasFeature` is moderately expensive. On targets with many
+                    // features (e.g. x86) these calls take a non-trivial fraction of runtime
+                    // when compiling very small programs.
+                    if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } {
+                        return false;
                     }
-                    true
-                } else {
-                    false
                 }
-            })
-            .map(|(feature, _, _)| Symbol::intern(feature)),
-    );
+                true
+            } else {
+                false
+            }
+        })
+        .map(|(feature, _, _)| Symbol::intern(feature))
+        .collect();
 
-    // Add enabled features
+    // Add enabled and remove disabled features.
     for (enabled, feature) in
         sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
             Some('+') => Some((true, Symbol::intern(&s[1..]))),
@@ -360,7 +359,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
             #[allow(rustc::potential_query_instability)]
             features.extend(
                 sess.target
-                    .implied_target_features(std::iter::once(feature.as_str()))
+                    .implied_target_features(feature.as_str())
                     .iter()
                     .map(|s| Symbol::intern(s)),
             );
@@ -371,11 +370,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
             // `features.contains` below.
             #[allow(rustc::potential_query_instability)]
             features.retain(|f| {
-                if sess
-                    .target
-                    .implied_target_features(std::iter::once(f.as_str()))
-                    .contains(&feature.as_str())
-                {
+                if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) {
                     // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
                     // remove `f`. (This is the standard logical contraposition principle.)
                     false
@@ -387,25 +382,31 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
         }
     }
 
-    // Filter enabled features based on feature gates
-    sess.target
-        .rust_target_features()
-        .iter()
-        .filter_map(|(feature, gate, _)| {
-            // The `allow_unstable` set is used by rustc internally to determined which target
-            // features are truly available, so we want to return even perma-unstable "forbidden"
-            // features.
-            if allow_unstable
-                || (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
-            {
-                Some(*feature)
-            } else {
-                None
-            }
-        })
-        .filter(|feature| features.contains(&Symbol::intern(feature)))
-        .map(|feature| Symbol::intern(feature))
-        .collect()
+    // Filter enabled features based on feature gates.
+    let f = |allow_unstable| {
+        sess.target
+            .rust_target_features()
+            .iter()
+            .filter_map(|(feature, gate, _)| {
+                // The `allow_unstable` set is used by rustc internally to determined which target
+                // features are truly available, so we want to return even perma-unstable
+                // "forbidden" features.
+                if allow_unstable
+                    || (gate.in_cfg()
+                        && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
+                {
+                    Some(Symbol::intern(feature))
+                } else {
+                    None
+                }
+            })
+            .filter(|feature| features.contains(&feature))
+            .collect()
+    };
+
+    let target_features = f(false);
+    let unstable_target_features = f(true);
+    (target_features, unstable_target_features)
 }
 
 pub(crate) fn print_version() {
@@ -682,7 +683,7 @@ pub(crate) fn global_llvm_features(
         for feature in sess.opts.cg.target_feature.split(',') {
             if let Some(feature) = feature.strip_prefix('+') {
                 all_rust_features.extend(
-                    UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
+                    UnordSet::from(sess.target.implied_target_features(feature))
                         .to_sorted_stable_ord()
                         .iter()
                         .map(|&&s| (true, s)),
diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs
index 95a5e96fe46..8058cd1b178 100644
--- a/compiler/rustc_codegen_ssa/src/target_features.rs
+++ b/compiler/rustc_codegen_ssa/src/target_features.rs
@@ -190,7 +190,7 @@ pub(crate) fn provide(providers: &mut Providers) {
         },
         implied_target_features: |tcx, feature: Symbol| {
             let feature = feature.as_str();
-            UnordSet::from(tcx.sess.target.implied_target_features(std::iter::once(feature)))
+            UnordSet::from(tcx.sess.target.implied_target_features(feature))
                 .into_sorted_stable_ord()
                 .into_iter()
                 .map(|s| Symbol::intern(s))
diff --git a/compiler/rustc_codegen_ssa/src/traits/backend.rs b/compiler/rustc_codegen_ssa/src/traits/backend.rs
index ebcf118b903..65fd843e7a5 100644
--- a/compiler/rustc_codegen_ssa/src/traits/backend.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs
@@ -45,10 +45,13 @@ pub trait CodegenBackend {
 
     fn print(&self, _req: &PrintRequest, _out: &mut String, _sess: &Session) {}
 
-    /// Returns the features that should be set in `cfg(target_features)`.
+    /// Returns two feature sets:
+    /// - The first has the features that should be set in `cfg(target_features)`.
+    /// - The second is like the first, but also includes unstable features.
+    ///
     /// RUSTC_SPECIFIC_FEATURES should be skipped here, those are handled outside codegen.
-    fn target_features_cfg(&self, _sess: &Session, _allow_unstable: bool) -> Vec<Symbol> {
-        vec![]
+    fn target_features_cfg(&self, _sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
+        (vec![], vec![])
     }
 
     fn print_passes(&self) {}
diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs
index bc2aae7cd87..5cccab893bb 100644
--- a/compiler/rustc_interface/src/util.rs
+++ b/compiler/rustc_interface/src/util.rs
@@ -39,11 +39,11 @@ pub(crate) fn add_configuration(
 ) {
     let tf = sym::target_feature;
 
-    let unstable_target_features = codegen_backend.target_features_cfg(sess, true);
-    sess.unstable_target_features.extend(unstable_target_features.iter().cloned());
+    let (target_features, unstable_target_features) = codegen_backend.target_features_cfg(sess);
 
-    let target_features = codegen_backend.target_features_cfg(sess, false);
-    sess.target_features.extend(target_features.iter().cloned());
+    sess.unstable_target_features.extend(unstable_target_features.iter().copied());
+
+    sess.target_features.extend(target_features.iter().copied());
 
     cfg.extend(target_features.into_iter().map(|feat| (tf, Some(feat))));
 
diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs
index d05466bb484..6d3b6608ea2 100644
--- a/compiler/rustc_target/src/target_features.rs
+++ b/compiler/rustc_target/src/target_features.rs
@@ -768,17 +768,15 @@ impl Target {
         }
     }
 
-    pub fn implied_target_features<'a>(
-        &self,
-        base_features: impl Iterator<Item = &'a str>,
-    ) -> FxHashSet<&'a str> {
+    // Note: the returned set includes `base_feature`.
+    pub fn implied_target_features<'a>(&self, base_feature: &'a str) -> FxHashSet<&'a str> {
         let implied_features =
             self.rust_target_features().iter().map(|(f, _, i)| (f, i)).collect::<FxHashMap<_, _>>();
 
-        // implied target features have their own implied target features, so we traverse the
-        // map until there are no more features to add
+        // Implied target features have their own implied target features, so we traverse the
+        // map until there are no more features to add.
         let mut features = FxHashSet::default();
-        let mut new_features = base_features.collect::<Vec<&str>>();
+        let mut new_features = vec![base_feature];
         while let Some(new_feature) = new_features.pop() {
             if features.insert(new_feature) {
                 if let Some(implied_features) = implied_features.get(&new_feature) {