about summary refs log tree commit diff
path: root/compiler/rustc_codegen_llvm/src/llvm_util.rs
diff options
context:
space:
mode:
authorSimonas Kazlauskas <git@kazlauskas.me>2021-03-13 15:29:39 +0200
committerSimonas Kazlauskas <git@kazlauskas.me>2021-03-16 21:32:55 +0200
commit72fb4379d56b93b9bd0149649a74fb4b5465ec18 (patch)
tree6b5fe3a1dbdbc9c56fbcf342eea1393da39b372b /compiler/rustc_codegen_llvm/src/llvm_util.rs
parent0517acd54353869b2fbfc50af61ea7bd1fd309e0 (diff)
downloadrust-72fb4379d56b93b9bd0149649a74fb4b5465ec18.tar.gz
rust-72fb4379d56b93b9bd0149649a74fb4b5465ec18.zip
Adjust `-Ctarget-cpu=native` handling in cg_llvm
When cg_llvm encounters the `-Ctarget-cpu=native` it computes an
explciit set of features that applies to the target in order to
correctly compile code for the host CPU (because e.g. `skylake` alone is
not sufficient to tell if some of the instructions are available or
not).

However there were a couple of issues with how we did this. Firstly, the
order in which features were overriden wasn't quite right – conceptually
you'd expect `-Ctarget-cpu=native` option to override the features that
are implicitly set by the target definition. However due to how other
`-Ctarget-cpu` values are handled we must adopt the following order
of priority:

* Features from -Ctarget-cpu=*; are overriden by
* Features implied by --target; are overriden by
* Features from -Ctarget-feature; are overriden by
* function specific features.

Another problem was in that the function level `target-features`
attribute would overwrite the entire set of the globally enabled
features, rather than just the features the
`#[target_feature(enable/disable)]` specified. With something like
`-Ctarget-cpu=native` we'd end up in a situation wherein a function
without `#[target_feature(enable)]` annotation would have a broader
set of features compared to a function with one such attribute. This
turned out to be a cause of heavy run-time regressions in some code
using these function-level attributes in conjunction with
`-Ctarget-cpu=native`, for example.

With this PR rustc is more careful about specifying the entire set of
features for functions that use `#[target_feature(enable/disable)]` or
`#[instruction_set]` attributes.

Sadly testing the original reproducer for this behaviour is quite
impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in
particular on developer or CI machines.
Diffstat (limited to 'compiler/rustc_codegen_llvm/src/llvm_util.rs')
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs66
1 files changed, 56 insertions, 10 deletions
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 544ef38c12c..c7dff41955e 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -218,13 +218,39 @@ pub fn target_cpu(sess: &Session) -> &str {
     handle_native(name)
 }
 
-pub fn handle_native_features(sess: &Session) -> Vec<String> {
+/// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
+/// `--target` and similar).
+// FIXME(nagisa): Cache the output of this somehow? Maybe make this a query? We're calling this
+// for every function that has `#[target_feature]` on it. The global features won't change between
+// the functions; only crates, maybe…
+pub fn llvm_global_features(sess: &Session) -> Vec<String> {
+    // FIXME(nagisa): this should definitely be available more centrally and to other codegen backends.
+    /// These features control behaviour of rustc rather than llvm.
+    const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];
+
+    // Features that come earlier are overriden by conflicting features later in the string.
+    // Typically we'll want more explicit settings to override the implicit ones, so:
+    //
+    // * Features from -Ctarget-cpu=*; are overriden by [^1]
+    // * Features implied by --target; are overriden by
+    // * Features from -Ctarget-feature; are overriden by
+    // * function specific features.
+    //
+    // [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly
+    // through LLVM TargetMachine implementation.
+    //
+    // FIXME(nagisa): it isn't clear what's the best interaction between features implied by
+    // `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always
+    // override anything that's implicit, so e.g. when there's no `--target` flag, features implied
+    // the host target are overriden by `-Ctarget-cpu=*`. On the other hand, what about when both
+    // `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both
+    // flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence
+    // should be taken in cases like these.
+    let mut features = vec![];
+
+    // -Ctarget-cpu=native
     match sess.opts.cg.target_cpu {
-        Some(ref s) => {
-            if s != "native" {
-                return vec![];
-            }
-
+        Some(ref s) if s == "native" => {
             let features_string = unsafe {
                 let ptr = llvm::LLVMGetHostCPUFeatures();
                 let features_string = if !ptr.is_null() {
@@ -242,11 +268,31 @@ pub fn handle_native_features(sess: &Session) -> Vec<String> {
 
                 features_string
             };
-
-            features_string.split(",").map(|s| s.to_owned()).collect()
+            features.extend(features_string.split(",").map(String::from));
         }
-        None => vec![],
-    }
+        Some(_) | None => {}
+    };
+
+    // Features implied by an implicit or explicit `--target`.
+    features.extend(
+        sess.target
+            .features
+            .split(',')
+            .filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
+            .map(String::from),
+    );
+
+    // -Ctarget-features
+    features.extend(
+        sess.opts
+            .cg
+            .target_feature
+            .split(',')
+            .filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
+            .map(String::from),
+    );
+
+    features
 }
 
 pub fn tune_cpu(sess: &Session) -> Option<&str> {