about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSimonas Kazlauskas <git@kazlauskas.me>2021-03-13 15:29:39 +0200
committerSimonas Kazlauskas <git@kazlauskas.me>2021-03-16 21:32:55 +0200
commit72fb4379d56b93b9bd0149649a74fb4b5465ec18 (patch)
tree6b5fe3a1dbdbc9c56fbcf342eea1393da39b372b
parent0517acd54353869b2fbfc50af61ea7bd1fd309e0 (diff)
downloadrust-72fb4379d56b93b9bd0149649a74fb4b5465ec18.tar.gz
rust-72fb4379d56b93b9bd0149649a74fb4b5465ec18.zip
Adjust `-Ctarget-cpu=native` handling in cg_llvm
When cg_llvm encounters the `-Ctarget-cpu=native` it computes an
explciit set of features that applies to the target in order to
correctly compile code for the host CPU (because e.g. `skylake` alone is
not sufficient to tell if some of the instructions are available or
not).

However there were a couple of issues with how we did this. Firstly, the
order in which features were overriden wasn't quite right – conceptually
you'd expect `-Ctarget-cpu=native` option to override the features that
are implicitly set by the target definition. However due to how other
`-Ctarget-cpu` values are handled we must adopt the following order
of priority:

* Features from -Ctarget-cpu=*; are overriden by
* Features implied by --target; are overriden by
* Features from -Ctarget-feature; are overriden by
* function specific features.

Another problem was in that the function level `target-features`
attribute would overwrite the entire set of the globally enabled
features, rather than just the features the
`#[target_feature(enable/disable)]` specified. With something like
`-Ctarget-cpu=native` we'd end up in a situation wherein a function
without `#[target_feature(enable)]` annotation would have a broader
set of features compared to a function with one such attribute. This
turned out to be a cause of heavy run-time regressions in some code
using these function-level attributes in conjunction with
`-Ctarget-cpu=native`, for example.

With this PR rustc is more careful about specifying the entire set of
features for functions that use `#[target_feature(enable/disable)]` or
`#[instruction_set]` attributes.

Sadly testing the original reproducer for this behaviour is quite
impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in
particular on developer or CI machines.
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs30
-rw-r--r--compiler/rustc_codegen_llvm/src/back/write.rs5
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs66
-rw-r--r--src/test/assembly/target-feature-multiple.rs41
-rw-r--r--src/test/codegen/target-feature-multiple.rs9
-rw-r--r--src/test/codegen/target-feature-on-functions.rs9
-rw-r--r--src/test/codegen/target-feature-overrides.rs47
7 files changed, 155 insertions, 52 deletions
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 09ece6164eb..64ebe585dd8 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -152,18 +152,6 @@ fn set_probestack(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
     }
 }
 
-pub fn llvm_target_features(sess: &Session) -> impl Iterator<Item = &str> {
-    const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];
-
-    let cmdline = sess
-        .opts
-        .cg
-        .target_feature
-        .split(',')
-        .filter(|f| !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)));
-    sess.target.features.split(',').chain(cmdline).filter(|l| !l.is_empty())
-}
-
 pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
     let target_cpu = SmallCStr::new(llvm_util::target_cpu(cx.tcx.sess));
     llvm::AddFunctionAttrStringValue(
@@ -301,20 +289,22 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
     // The target doesn't care; the subtarget reads our attribute.
     apply_tune_cpu_attr(cx, llfn);
 
-    let features = llvm_target_features(cx.tcx.sess)
-        .map(|s| s.to_string())
-        .chain(codegen_fn_attrs.target_features.iter().map(|f| {
+    let function_features = codegen_fn_attrs
+        .target_features
+        .iter()
+        .map(|f| {
             let feature = &f.as_str();
             format!("+{}", llvm_util::to_llvm_feature(cx.tcx.sess, feature))
-        }))
+        })
         .chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
             InstructionSetAttr::ArmA32 => "-thumb-mode".to_string(),
             InstructionSetAttr::ArmT32 => "+thumb-mode".to_string(),
         }))
-        .collect::<Vec<String>>()
-        .join(",");
-
-    if !features.is_empty() {
+        .collect::<Vec<String>>();
+    if !function_features.is_empty() {
+        let mut global_features = llvm_util::llvm_global_features(cx.tcx.sess);
+        global_features.extend(function_features.into_iter());
+        let features = global_features.join(",");
         let val = CString::new(features).unwrap();
         llvm::AddFunctionAttrStringValue(
             llfn,
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index c224da7885b..d33cb67e403 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -1,4 +1,3 @@
-use crate::attributes;
 use crate::back::lto::ThinBuffer;
 use crate::back::profiling::{
     selfprofile_after_pass_callback, selfprofile_before_pass_callback, LlvmSelfProfiler,
@@ -166,8 +165,6 @@ pub fn target_machine_factory(
 
     let code_model = to_llvm_code_model(sess.code_model());
 
-    let mut features = llvm_util::handle_native_features(sess);
-    features.extend(attributes::llvm_target_features(sess).map(|s| s.to_owned()));
     let mut singlethread = sess.target.singlethread;
 
     // On the wasm target once the `atomics` feature is enabled that means that
@@ -182,7 +179,7 @@ pub fn target_machine_factory(
 
     let triple = SmallCStr::new(&sess.target.llvm_target);
     let cpu = SmallCStr::new(llvm_util::target_cpu(sess));
-    let features = features.join(",");
+    let features = llvm_util::llvm_global_features(sess).join(",");
     let features = CString::new(features).unwrap();
     let abi = SmallCStr::new(&sess.target.llvm_abiname);
     let trap_unreachable =
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index 544ef38c12c..c7dff41955e 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -218,13 +218,39 @@ pub fn target_cpu(sess: &Session) -> &str {
     handle_native(name)
 }
 
-pub fn handle_native_features(sess: &Session) -> Vec<String> {
+/// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
+/// `--target` and similar).
+// FIXME(nagisa): Cache the output of this somehow? Maybe make this a query? We're calling this
+// for every function that has `#[target_feature]` on it. The global features won't change between
+// the functions; only crates, maybe…
+pub fn llvm_global_features(sess: &Session) -> Vec<String> {
+    // FIXME(nagisa): this should definitely be available more centrally and to other codegen backends.
+    /// These features control behaviour of rustc rather than llvm.
+    const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];
+
+    // Features that come earlier are overriden by conflicting features later in the string.
+    // Typically we'll want more explicit settings to override the implicit ones, so:
+    //
+    // * Features from -Ctarget-cpu=*; are overriden by [^1]
+    // * Features implied by --target; are overriden by
+    // * Features from -Ctarget-feature; are overriden by
+    // * function specific features.
+    //
+    // [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly
+    // through LLVM TargetMachine implementation.
+    //
+    // FIXME(nagisa): it isn't clear what's the best interaction between features implied by
+    // `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always
+    // override anything that's implicit, so e.g. when there's no `--target` flag, features implied
+    // the host target are overriden by `-Ctarget-cpu=*`. On the other hand, what about when both
+    // `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both
+    // flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence
+    // should be taken in cases like these.
+    let mut features = vec![];
+
+    // -Ctarget-cpu=native
     match sess.opts.cg.target_cpu {
-        Some(ref s) => {
-            if s != "native" {
-                return vec![];
-            }
-
+        Some(ref s) if s == "native" => {
             let features_string = unsafe {
                 let ptr = llvm::LLVMGetHostCPUFeatures();
                 let features_string = if !ptr.is_null() {
@@ -242,11 +268,31 @@ pub fn handle_native_features(sess: &Session) -> Vec<String> {
 
                 features_string
             };
-
-            features_string.split(",").map(|s| s.to_owned()).collect()
+            features.extend(features_string.split(",").map(String::from));
         }
-        None => vec![],
-    }
+        Some(_) | None => {}
+    };
+
+    // Features implied by an implicit or explicit `--target`.
+    features.extend(
+        sess.target
+            .features
+            .split(',')
+            .filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
+            .map(String::from),
+    );
+
+    // -Ctarget-features
+    features.extend(
+        sess.opts
+            .cg
+            .target_feature
+            .split(',')
+            .filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
+            .map(String::from),
+    );
+
+    features
 }
 
 pub fn tune_cpu(sess: &Session) -> Option<&str> {
diff --git a/src/test/assembly/target-feature-multiple.rs b/src/test/assembly/target-feature-multiple.rs
new file mode 100644
index 00000000000..4c2073678b8
--- /dev/null
+++ b/src/test/assembly/target-feature-multiple.rs
@@ -0,0 +1,41 @@
+// assembly-output: emit-asm
+// needs-llvm-components: x86
+// revisions: TWOFLAGS SINGLEFLAG
+// compile-flags: --target=x86_64-unknown-linux-gnu
+// [TWOFLAGS] compile-flags: -C target-feature=+rdrnd -C target-feature=+rdseed
+// [SINGLEFLAG] compile-flags: -C target-feature=+rdrnd,+rdseed
+
+// Target features set via flags aren't necessarily reflected in the IR, so the only way to test
+// them is to build code that requires the features to be enabled to work.
+//
+// In this particular test if `rdrnd,rdseed` somehow didn't make it to LLVM, the instruction
+// selection should crash.
+//
+// > LLVM ERROR: Cannot select: 0x7f00f400c010: i32,i32,ch = X86ISD::RDSEED 0x7f00f400bfa8:2
+// > In function: foo
+//
+// See also src/test/codegen/target-feature-overrides.rs
+#![feature(no_core, lang_items, link_llvm_intrinsics, abi_unadjusted)]
+#![crate_type = "lib"]
+#![no_core]
+
+#[lang = "sized"]
+trait Sized {}
+#[lang = "copy"]
+trait Copy {}
+
+// Use of these requires target features to be enabled
+extern "unadjusted" {
+    #[link_name = "llvm.x86.rdrand.32"]
+    fn x86_rdrand32_step() -> (u32, i32);
+    #[link_name = "llvm.x86.rdseed.32"]
+    fn x86_rdseed32_step() -> (u32, i32);
+}
+
+#[no_mangle]
+pub unsafe fn foo() -> (u32, u32) {
+    // CHECK-LABEL: foo:
+    // CHECK: rdrand
+    // CHECK: rdseed
+    (x86_rdrand32_step().0, x86_rdseed32_step().0)
+}
diff --git a/src/test/codegen/target-feature-multiple.rs b/src/test/codegen/target-feature-multiple.rs
deleted file mode 100644
index f71a9c3c582..00000000000
--- a/src/test/codegen/target-feature-multiple.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-// only-x86_64
-// compile-flags: -C target-feature=+sse2,-avx,+avx2 -C target-feature=+avx,-avx2
-
-#![crate_type = "lib"]
-
-#[no_mangle]
-pub fn foo() {
-    // CHECK: attributes #0 = { {{.*}}"target-features"="+sse2,-avx,+avx2,+avx,-avx2"{{.*}} }
-}
diff --git a/src/test/codegen/target-feature-on-functions.rs b/src/test/codegen/target-feature-on-functions.rs
deleted file mode 100644
index d4d39d08b1e..00000000000
--- a/src/test/codegen/target-feature-on-functions.rs
+++ /dev/null
@@ -1,9 +0,0 @@
-// only-x86_64
-// compile-flags: -C target-feature=+avx
-
-#![crate_type = "lib"]
-
-#[no_mangle]
-pub fn foo() {
-    // CHECK: attributes #0 = { {{.*}}"target-features"="+avx"{{.*}} }
-}
diff --git a/src/test/codegen/target-feature-overrides.rs b/src/test/codegen/target-feature-overrides.rs
new file mode 100644
index 00000000000..2c19cfd8c22
--- /dev/null
+++ b/src/test/codegen/target-feature-overrides.rs
@@ -0,0 +1,47 @@
+// revisions: COMPAT INCOMPAT
+// needs-llvm-components: x86
+// compile-flags: --target=x86_64-unknown-linux-gnu -Copt-level=3
+// [COMPAT] compile-flags: -Ctarget-feature=+avx2,+avx
+// [INCOMPAT] compile-flags: -Ctarget-feature=-avx2,-avx
+
+// See also src/test/assembly/target-feature-multiple.rs
+#![feature(no_core, lang_items)]
+#![crate_type = "lib"]
+#![no_core]
+
+
+#[lang = "sized"]
+trait Sized {}
+#[lang = "copy"]
+trait Copy {}
+
+extern "C" {
+    fn peach() -> u32;
+}
+
+#[inline]
+#[target_feature(enable = "avx")]
+#[no_mangle]
+pub unsafe fn apple() -> u32 {
+// CHECK-LABEL: @apple()
+// CHECK-SAME: [[APPLEATTRS:#[0-9]+]] {
+// CHECK: {{.*}}call{{.*}}@peach
+    peach()
+}
+
+// target features same as global (not reflected or overriden in IR)
+#[no_mangle]
+pub unsafe fn banana() -> u32 {
+// CHECK-LABEL: @banana()
+// CHECK-SAME: [[BANANAATTRS:#[0-9]+]] {
+// COMPAT: {{.*}}call{{.*}}@peach
+// INCOMPAT: {{.*}}call{{.*}}@apple
+    apple() // Compatible for inline in COMPAT revision and can't be inlined in INCOMPAT
+}
+
+// CHECK: attributes [[APPLEATTRS]]
+// COMPAT-SAME: "target-features"="+avx2,+avx,+avx"
+// INCOMPAT-SAME: "target-features"="-avx2,-avx,+avx"
+// CHECK: attributes [[BANANAATTRS]]
+// CHECK-NOT: target-features
+// CHECK-SAME: }