about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMingye Wang <arthur200126@gmail.com>2020-09-17 17:39:26 +0800
committerMingye Wang <arthur2e5@aosc.io>2020-10-05 07:50:44 +0800
commita35a93f09cc111a53d00efc567ad678583dd5ac7 (patch)
tree943360a4a63ea2f050f5e53e691afb5c8a11d2bc
parent285fc7d704fcdd7b2a37d475d04d5d955490e000 (diff)
downloadrust-a35a93f09cc111a53d00efc567ad678583dd5ac7.tar.gz
rust-a35a93f09cc111a53d00efc567ad678583dd5ac7.zip
Pass tune-cpu to LLVM
I think this is how it should work...
-rw-r--r--compiler/rustc_codegen_llvm/src/attributes.rs15
-rw-r--r--compiler/rustc_codegen_llvm/src/context.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/lib.rs3
-rw-r--r--compiler/rustc_codegen_llvm/src/llvm_util.rs22
-rw-r--r--compiler/rustc_codegen_ssa/src/traits/backend.rs1
-rw-r--r--compiler/rustc_interface/src/tests.rs1
-rw-r--r--compiler/rustc_session/src/options.rs2
-rw-r--r--src/doc/rustc/src/codegen-options/index.md20
-rw-r--r--src/test/codegen/tune-cpu-on-functions.rs21
9 files changed, 80 insertions, 8 deletions
diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
index 227a87ff819..434a42607d6 100644
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
     );
 }
 
+pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
+    if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
+        let tune_cpu = SmallCStr::new(tune);
+        llvm::AddFunctionAttrStringValue(
+            llfn,
+            llvm::AttributePlace::Function,
+            const_cstr!("tune-cpu"),
+            tune_cpu.as_c_str(),
+        );
+    }
+}
+
 /// Sets the `NonLazyBind` LLVM attribute on a given function,
 /// assuming the codegen options allow skipping the PLT.
 pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
@@ -300,6 +312,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
     // Without this, ThinLTO won't inline Rust functions into Clang generated
     // functions (because Clang annotates functions this way too).
     apply_target_cpu_attr(cx, llfn);
+    // tune-cpu is only conveyed through the attribute for our purpose.
+    // The target doesn't care; the subtarget reads our attribute.
+    apply_tune_cpu_attr(cx, llfn);
 
     let features = llvm_target_features(cx.tcx.sess)
         .map(|s| s.to_string())
diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs
index 1c51a9df5d8..6e6b2cacc03 100644
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
     }
 
     fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
-        attributes::apply_target_cpu_attr(self, llfn)
+        attributes::apply_target_cpu_attr(self, llfn);
+        attributes::apply_tune_cpu_attr(self, llfn);
     }
 
     fn create_used_variable(&self) {
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index 2e2abe9fb30..28e49e823c0 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
     fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
         llvm_util::target_cpu(sess)
     }
+    fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
+        llvm_util::tune_cpu(sess)
+    }
 }
 
 impl WriteBackendMethods for LlvmCodegenBackend {
diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs
index f0b50459837..7fe30fc8ac0 100644
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@@ -351,11 +351,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
     }
 }
 
-pub fn target_cpu(sess: &Session) -> &str {
-    let name = match sess.opts.cg.target_cpu {
-        Some(ref s) => &**s,
-        None => &*sess.target.target.options.cpu,
-    };
+fn handle_native(name: &str) -> &str {
     if name != "native" {
         return name;
     }
@@ -366,3 +362,19 @@ pub fn target_cpu(sess: &Session) -> &str {
         str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
     }
 }
+
+pub fn target_cpu(sess: &Session) -> &str {
+    let name = match sess.opts.cg.target_cpu {
+        Some(ref s) => &**s,
+        None => &*sess.target.target.options.cpu,
+    };
+
+    handle_native(name)
+}
+
+pub fn tune_cpu(sess: &Session) -> Option<&str> {
+    match sess.opts.debugging_opts.tune_cpu {
+        Some(ref s) => Some(handle_native(&**s)),
+        None => None,
+    }
+}
diff --git a/compiler/rustc_codegen_ssa/src/traits/backend.rs b/compiler/rustc_codegen_ssa/src/traits/backend.rs
index 3522ea01153..98cd5d82813 100644
--- a/compiler/rustc_codegen_ssa/src/traits/backend.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs
@@ -116,4 +116,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
         opt_level: config::OptLevel,
     ) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
     fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
+    fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
 }
diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs
index dc4fa807f78..a0f2929c7e3 100644
--- a/compiler/rustc_interface/src/tests.rs
+++ b/compiler/rustc_interface/src/tests.rs
@@ -583,6 +583,7 @@ fn test_debugging_options_tracking_hash() {
     tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
     tracked!(teach, true);
     tracked!(thinlto, Some(true));
+    tracked!(tune_cpu, Some(String::from("abc")));
     tracked!(tls_model, Some(TlsModel::GeneralDynamic));
     tracked!(treat_err_as_bug, Some(1));
     tracked!(unleash_the_miri_inside_of_you, true);
diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
index 8cc55f4ebe8..611150f64f5 100644
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@@ -1079,6 +1079,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
         "show extended diagnostic help (default: no)"),
     terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
         "set the current terminal width"),
+    tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
+        "select processor to schedule for (`rustc --print target-cpus` for details)"),
     thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
         "enable ThinLTO when possible"),
     // We default to 1 here since we want to behave like
diff --git a/src/doc/rustc/src/codegen-options/index.md b/src/doc/rustc/src/codegen-options/index.md
index bed10ca16d3..f6493e49c3c 100644
--- a/src/doc/rustc/src/codegen-options/index.md
+++ b/src/doc/rustc/src/codegen-options/index.md
@@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
 This instructs `rustc` to generate code specifically for a particular processor.
 
 You can run `rustc --print target-cpus` to see the valid options to pass
-here. Additionally, `native` can be passed to use the processor of the host
-machine. Each target has a default base CPU.
+here. Each target has a default base CPU. Special values include:
+
+* `native` can be passed to use the processor of the host machine. 
+* `generic` refers to an LLVM target with minimal features but modern tuning.
 
 ## target-feature
 
@@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
 Each target and [`target-cpu`](#target-cpu) has a default set of enabled
 features.
 
+## tune-cpu
+
+This instructs `rustc` to schedule code specifically for a particular
+processor. This does not affect the compatibility (instruction sets or ABI),
+but should make your code slightly more efficient on the selected CPU.
+
+The valid options are the same as those for [`target-cpu`](#target-cpu).
+The default is `None`, which LLVM translates as the `target-cpu`.
+
+This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.
+
+Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
+effective only for x86 targets.
+
 [option-emit]: ../command-line-arguments.md#option-emit
 [option-o-optimize]: ../command-line-arguments.md#option-o-optimize
 [profile-guided optimization]: ../profile-guided-optimization.md
diff --git a/src/test/codegen/tune-cpu-on-functions.rs b/src/test/codegen/tune-cpu-on-functions.rs
new file mode 100644
index 00000000000..9121799cdbf
--- /dev/null
+++ b/src/test/codegen/tune-cpu-on-functions.rs
@@ -0,0 +1,21 @@
+// This test makes sure that functions get annotated with the proper
+// "tune-cpu" attribute in LLVM.
+
+// no-prefer-dynamic
+// ignore-tidy-linelength
+// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic
+
+#![crate_type = "staticlib"]
+
+// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
+#[no_mangle]
+pub extern fn exported() {
+    not_exported();
+}
+
+// CHECK-LABEL: ; tune_cpu_on_functions::not_exported
+// CHECK-NEXT: ; Function Attrs:
+// CHECK-NEXT: define {{.*}}() {{.*}} #0
+fn not_exported() {}
+
+// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"