diff options
| author | bors <bors@rust-lang.org> | 2025-03-11 12:05:16 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2025-03-11 12:05:16 +0000 |
| commit | ebf0cf75d368c035f4c7e7246d203bd469ee4a51 (patch) | |
| tree | 1c26d90ad88273c67bd0f169b122ebd9a4b88e84 | |
| parent | 705421b52239d7393b4738764b192179d5c139c2 (diff) | |
| parent | cee311454427bf5049f5493bb7c7d74df2abb369 (diff) | |
| download | rust-ebf0cf75d368c035f4c7e7246d203bd469ee4a51.tar.gz rust-ebf0cf75d368c035f4c7e7246d203bd469ee4a51.zip | |
Auto merge of #137586 - nnethercote:SetImpliedBits, r=bjorn3
Speed up target feature computation The LLVM backend calls `LLVMRustHasFeature` twice for every feature. In short-running rustc invocations, this accounts for a surprising amount of work. r? `@bjorn3`
| -rw-r--r-- | compiler/rustc_codegen_cranelift/src/lib.rs | 13 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/gcc_util.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_codegen_gcc/src/lib.rs | 68 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/lib.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/llvm_util.rs | 115 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/target_features.rs | 2 | ||||
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/traits/backend.rs | 9 | ||||
| -rw-r--r-- | compiler/rustc_interface/src/util.rs | 8 | ||||
| -rw-r--r-- | compiler/rustc_target/src/target_features.rs | 12 |
9 files changed, 120 insertions, 113 deletions
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs index a3f43744875..06939beb374 100644 --- a/compiler/rustc_codegen_cranelift/src/lib.rs +++ b/compiler/rustc_codegen_cranelift/src/lib.rs @@ -176,13 +176,9 @@ impl CodegenBackend for CraneliftCodegenBackend { } } - fn target_features_cfg( - &self, - sess: &Session, - _allow_unstable: bool, - ) -> Vec<rustc_span::Symbol> { + fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) { // FIXME return the actually used target features. this is necessary for #[cfg(target_feature)] - if sess.target.arch == "x86_64" && sess.target.os != "none" { + let target_features = if sess.target.arch == "x86_64" && sess.target.os != "none" { // x86_64 mandates SSE2 support and rustc requires the x87 feature to be enabled vec![sym::fsxr, sym::sse, sym::sse2, Symbol::intern("x87")] } else if sess.target.arch == "aarch64" { @@ -196,7 +192,10 @@ impl CodegenBackend for CraneliftCodegenBackend { } } else { vec![] - } + }; + // FIXME do `unstable_target_features` properly + let unstable_target_features = target_features.clone(); + (target_features, unstable_target_features) } fn print_version(&self) { diff --git a/compiler/rustc_codegen_gcc/src/gcc_util.rs b/compiler/rustc_codegen_gcc/src/gcc_util.rs index 4e8c8aaaf5c..6eae0c24f48 100644 --- a/compiler/rustc_codegen_gcc/src/gcc_util.rs +++ b/compiler/rustc_codegen_gcc/src/gcc_util.rs @@ -48,7 +48,7 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<Stri for feature in sess.opts.cg.target_feature.split(',') { if let Some(feature) = feature.strip_prefix('+') { all_rust_features.extend( - UnordSet::from(sess.target.implied_target_features(std::iter::once(feature))) + UnordSet::from(sess.target.implied_target_features(feature)) .to_sorted_stable_ord() .iter() .map(|&&s| (true, s)), diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs index f090597f953..d478b2af46c 100644 --- a/compiler/rustc_codegen_gcc/src/lib.rs +++ b/compiler/rustc_codegen_gcc/src/lib.rs @@ -259,8 +259,8 @@ impl CodegenBackend for GccCodegenBackend { .join(sess) } - fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> { - target_features_cfg(sess, allow_unstable, &self.target_info) + fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) { + target_features_cfg(sess, &self.target_info) } } @@ -486,35 +486,41 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel { /// Returns the features that should be set in `cfg(target_feature)`. fn target_features_cfg( sess: &Session, - allow_unstable: bool, target_info: &LockedTargetInfo, -) -> Vec<Symbol> { +) -> (Vec<Symbol>, Vec<Symbol>) { // TODO(antoyo): use global_gcc_features. - sess.target - .rust_target_features() - .iter() - .filter_map(|&(feature, gate, _)| { - if allow_unstable - || (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none())) - { - Some(feature) - } else { - None - } - }) - .filter(|feature| { - // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it. - if *feature == "neon" { - return false; - } - target_info.cpu_supports(feature) - /* - adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma, - avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq, - bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm, - sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves - */ - }) - .map(Symbol::intern) - .collect() + let f = |allow_unstable| { + sess.target + .rust_target_features() + .iter() + .filter_map(|&(feature, gate, _)| { + if allow_unstable + || (gate.in_cfg() + && (sess.is_nightly_build() || gate.requires_nightly().is_none())) + { + Some(feature) + } else { + None + } + }) + .filter(|feature| { + // TODO: we disable Neon for now since we don't support the LLVM intrinsics for it. + if *feature == "neon" { + return false; + } + target_info.cpu_supports(feature) + /* + adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma, + avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq, + bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm, + sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves + */ + }) + .map(Symbol::intern) + .collect() + }; + + let target_features = f(false); + let unstable_target_features = f(true); + (target_features, unstable_target_features) } diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index e51d4852db2..dec93bbccc3 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -341,8 +341,8 @@ impl CodegenBackend for LlvmCodegenBackend { llvm_util::print_version(); } - fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> { - target_features_cfg(sess, allow_unstable) + fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) { + target_features_cfg(sess) } fn codegen_crate<'tcx>( diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 5cc4f4ab9e6..4a166b0872d 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -306,45 +306,44 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea /// Must express features in the way Rust understands them. /// /// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen. -pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> { - let mut features: FxHashSet<Symbol> = Default::default(); - +pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) { // Add base features for the target. // We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below. // The reason is that if LLVM considers a feature implied but we do not, we don't want that to // show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of - // the target CPU, that is still expanded to target features (with all their implied features) by - // LLVM. + // the target CPU, that is still expanded to target features (with all their implied features) + // by LLVM. let target_machine = create_informational_target_machine(sess, true); - // Compute which of the known target features are enabled in the 'base' target machine. - // We only consider "supported" features; "forbidden" features are not reflected in `cfg` as of now. - features.extend( - sess.target - .rust_target_features() - .iter() - .filter(|(feature, _, _)| { - // skip checking special features, as LLVM may not understand them - if RUSTC_SPECIAL_FEATURES.contains(feature) { - return true; - } - // check that all features in a given smallvec are enabled - if let Some(feat) = to_llvm_features(sess, feature) { - for llvm_feature in feat { - let cstr = SmallCStr::new(llvm_feature); - if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } - { - return false; - } + // Compute which of the known target features are enabled in the 'base' target machine. We only + // consider "supported" features; "forbidden" features are not reflected in `cfg` as of now. + let mut features: FxHashSet<Symbol> = sess + .target + .rust_target_features() + .iter() + .filter(|(feature, _, _)| { + // skip checking special features, as LLVM may not understand them + if RUSTC_SPECIAL_FEATURES.contains(feature) { + return true; + } + if let Some(feat) = to_llvm_features(sess, feature) { + for llvm_feature in feat { + let cstr = SmallCStr::new(llvm_feature); + // `LLVMRustHasFeature` is moderately expensive. On targets with many + // features (e.g. x86) these calls take a non-trivial fraction of runtime + // when compiling very small programs. + if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } { + return false; } - true - } else { - false } - }) - .map(|(feature, _, _)| Symbol::intern(feature)), - ); + true + } else { + false + } + }) + .map(|(feature, _, _)| Symbol::intern(feature)) + .collect(); - // Add enabled features + // Add enabled and remove disabled features. for (enabled, feature) in sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() { Some('+') => Some((true, Symbol::intern(&s[1..]))), @@ -360,7 +359,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S #[allow(rustc::potential_query_instability)] features.extend( sess.target - .implied_target_features(std::iter::once(feature.as_str())) + .implied_target_features(feature.as_str()) .iter() .map(|s| Symbol::intern(s)), ); @@ -371,11 +370,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S // `features.contains` below. #[allow(rustc::potential_query_instability)] features.retain(|f| { - if sess - .target - .implied_target_features(std::iter::once(f.as_str())) - .contains(&feature.as_str()) - { + if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) { // If `f` if implies `feature`, then `!feature` implies `!f`, so we have to // remove `f`. (This is the standard logical contraposition principle.) false @@ -387,25 +382,31 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S } } - // Filter enabled features based on feature gates - sess.target - .rust_target_features() - .iter() - .filter_map(|(feature, gate, _)| { - // The `allow_unstable` set is used by rustc internally to determined which target - // features are truly available, so we want to return even perma-unstable "forbidden" - // features. - if allow_unstable - || (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none())) - { - Some(*feature) - } else { - None - } - }) - .filter(|feature| features.contains(&Symbol::intern(feature))) - .map(|feature| Symbol::intern(feature)) - .collect() + // Filter enabled features based on feature gates. + let f = |allow_unstable| { + sess.target + .rust_target_features() + .iter() + .filter_map(|(feature, gate, _)| { + // The `allow_unstable` set is used by rustc internally to determined which target + // features are truly available, so we want to return even perma-unstable + // "forbidden" features. + if allow_unstable + || (gate.in_cfg() + && (sess.is_nightly_build() || gate.requires_nightly().is_none())) + { + Some(Symbol::intern(feature)) + } else { + None + } + }) + .filter(|feature| features.contains(&feature)) + .collect() + }; + + let target_features = f(false); + let unstable_target_features = f(true); + (target_features, unstable_target_features) } pub(crate) fn print_version() { @@ -682,7 +683,7 @@ pub(crate) fn global_llvm_features( for feature in sess.opts.cg.target_feature.split(',') { if let Some(feature) = feature.strip_prefix('+') { all_rust_features.extend( - UnordSet::from(sess.target.implied_target_features(std::iter::once(feature))) + UnordSet::from(sess.target.implied_target_features(feature)) .to_sorted_stable_ord() .iter() .map(|&&s| (true, s)), diff --git a/compiler/rustc_codegen_ssa/src/target_features.rs b/compiler/rustc_codegen_ssa/src/target_features.rs index 95a5e96fe46..8058cd1b178 100644 --- a/compiler/rustc_codegen_ssa/src/target_features.rs +++ b/compiler/rustc_codegen_ssa/src/target_features.rs @@ -190,7 +190,7 @@ pub(crate) fn provide(providers: &mut Providers) { }, implied_target_features: |tcx, feature: Symbol| { let feature = feature.as_str(); - UnordSet::from(tcx.sess.target.implied_target_features(std::iter::once(feature))) + UnordSet::from(tcx.sess.target.implied_target_features(feature)) .into_sorted_stable_ord() .into_iter() .map(|s| Symbol::intern(s)) diff --git a/compiler/rustc_codegen_ssa/src/traits/backend.rs b/compiler/rustc_codegen_ssa/src/traits/backend.rs index ebcf118b903..65fd843e7a5 100644 --- a/compiler/rustc_codegen_ssa/src/traits/backend.rs +++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs @@ -45,10 +45,13 @@ pub trait CodegenBackend { fn print(&self, _req: &PrintRequest, _out: &mut String, _sess: &Session) {} - /// Returns the features that should be set in `cfg(target_features)`. + /// Returns two feature sets: + /// - The first has the features that should be set in `cfg(target_features)`. + /// - The second is like the first, but also includes unstable features. + /// /// RUSTC_SPECIFIC_FEATURES should be skipped here, those are handled outside codegen. - fn target_features_cfg(&self, _sess: &Session, _allow_unstable: bool) -> Vec<Symbol> { - vec![] + fn target_features_cfg(&self, _sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) { + (vec![], vec![]) } fn print_passes(&self) {} diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs index bc2aae7cd87..5cccab893bb 100644 --- a/compiler/rustc_interface/src/util.rs +++ b/compiler/rustc_interface/src/util.rs @@ -39,11 +39,11 @@ pub(crate) fn add_configuration( ) { let tf = sym::target_feature; - let unstable_target_features = codegen_backend.target_features_cfg(sess, true); - sess.unstable_target_features.extend(unstable_target_features.iter().cloned()); + let (target_features, unstable_target_features) = codegen_backend.target_features_cfg(sess); - let target_features = codegen_backend.target_features_cfg(sess, false); - sess.target_features.extend(target_features.iter().cloned()); + sess.unstable_target_features.extend(unstable_target_features.iter().copied()); + + sess.target_features.extend(target_features.iter().copied()); cfg.extend(target_features.into_iter().map(|feat| (tf, Some(feat)))); diff --git a/compiler/rustc_target/src/target_features.rs b/compiler/rustc_target/src/target_features.rs index d05466bb484..6d3b6608ea2 100644 --- a/compiler/rustc_target/src/target_features.rs +++ b/compiler/rustc_target/src/target_features.rs @@ -768,17 +768,15 @@ impl Target { } } - pub fn implied_target_features<'a>( - &self, - base_features: impl Iterator<Item = &'a str>, - ) -> FxHashSet<&'a str> { + // Note: the returned set includes `base_feature`. + pub fn implied_target_features<'a>(&self, base_feature: &'a str) -> FxHashSet<&'a str> { let implied_features = self.rust_target_features().iter().map(|(f, _, i)| (f, i)).collect::<FxHashMap<_, _>>(); - // implied target features have their own implied target features, so we traverse the - // map until there are no more features to add + // Implied target features have their own implied target features, so we traverse the + // map until there are no more features to add. let mut features = FxHashSet::default(); - let mut new_features = base_features.collect::<Vec<&str>>(); + let mut new_features = vec![base_feature]; while let Some(new_feature) = new_features.pop() { if features.insert(new_feature) { if let Some(implied_features) = implied_features.get(&new_feature) { |
