about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2018-10-11 19:38:15 +0000
committerbors <bors@rust-lang.org>2018-10-11 19:38:15 +0000
commit77af314083e5acabf9ba5335e47271f35eef2e99 (patch)
tree09cf4997d1c6c33b73665a1ead76c6eb5dcf5ae5
parentb8b4150c042b06c46e29a9d12101f91fe13996e0 (diff)
parent6009da079419c9693fe4965ecacbd473c2553173 (diff)
downloadrust-77af314083e5acabf9ba5335e47271f35eef2e99.tar.gz
rust-77af314083e5acabf9ba5335e47271f35eef2e99.zip
Auto merge of #54592 - GabrielMajeri:no-plt, r=nagisa
Support for disabling PLT for better function call performance

This PR gives `rustc` the ability to skip the PLT when generating function calls into shared libraries. This can improve performance by reducing branch indirection.

AFAIK, the only advantage of using the PLT is to allow for ELF lazy binding. However, since Rust already [enables full relro for security](https://github.com/rust-lang/rust/pull/43170), lazy binding was disabled anyway.

This is a little known feature which is supported by [GCC](https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html) and [Clang](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-fplt) as `-fno-plt` (some Linux distros [enable it by default](https://git.archlinux.org/svntogit/packages.git/tree/trunk/makepkg.conf?h=packages/pacman#n40) for all builds).

Implementation inspired by [this patch](https://reviews.llvm.org/D39079#change-YvkpNDlMs_LT) which adds `-fno-plt` support to Clang.

## Performance

I didn't run a lot of benchmarks, but these are the results on my machine for a `clap` [benchmark](https://github.com/clap-rs/clap/blob/master/benches/05_ripgrep.rs):

```
 name              control ns/iter  no-plt ns/iter  diff ns/iter  diff %  speedup
 build_app_long    11,097           10,733                  -364  -3.28%   x 1.03
 build_app_short   11,089           10,742                  -347  -3.13%   x 1.03
 build_help_long   186,835          182,713               -4,122  -2.21%   x 1.02
 build_help_short  80,949           78,455                -2,494  -3.08%   x 1.03
 parse_clean       12,385           12,044                  -341  -2.75%   x 1.03
 parse_complex     19,438           19,017                  -421  -2.17%   x 1.02
 parse_lots        431,493          421,421              -10,072  -2.33%   x 1.02
```

A small performance improvement across the board, with no downsides. It's likely binaries which make a lot of function calls into dynamic libraries could see even more improvements. [This comment](https://patchwork.ozlabs.org/patch/468993/#1028255) suggests that, in some cases, `-fno-plt` could improve PIC/PIE code performance by 10%.

## Security benefits

**Bonus**: some of the speculative execution attacks rely on the PLT, by disabling it we reduce a big attack surface and reduce the need for [`retpoline`](https://reviews.llvm.org/D41723).

## Remaining PLT calls

The compiled binaries still have plenty of PLT calls, coming from C/C++ libraries. Building dependencies with `CFLAGS=-fno-plt CXXFLAGS=-fno-plt` removes them.
-rw-r--r--src/librustc/session/config.rs4
-rw-r--r--src/librustc/session/mod.rs24
-rw-r--r--src/librustc_codegen_llvm/attributes.rs9
-rw-r--r--src/librustc_codegen_llvm/context.rs7
-rw-r--r--src/librustc_codegen_llvm/declare.rs2
-rw-r--r--src/librustc_codegen_llvm/llvm/ffi.rs1
-rw-r--r--src/librustc_target/spec/mod.rs6
-rw-r--r--src/librustc_target/spec/x86_64_unknown_linux_gnux32.rs3
-rw-r--r--src/rustllvm/RustWrapper.cpp2
-rw-r--r--src/rustllvm/rustllvm.h1
-rw-r--r--src/test/codegen/naked-functions.rs10
-rw-r--r--src/test/codegen/no-plt.rs27
12 files changed, 89 insertions, 7 deletions
diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs
index c532b5ee56f..d8c36f81da3 100644
--- a/src/librustc/session/config.rs
+++ b/src/librustc/session/config.rs
@@ -1387,6 +1387,10 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
           "output a json file with profiler results"),
     emit_stack_sizes: bool = (false, parse_bool, [UNTRACKED],
           "emits a section containing stack size metadata"),
+    plt: Option<bool> = (None, parse_opt_bool, [TRACKED],
+          "whether to use the PLT when calling into shared libraries;
+          only has effect for PIC code on systems with ELF binaries
+          (default: PLT is disabled if full relro is enabled)"),
 }
 
 pub fn default_lib_output() -> CrateType {
diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs
index 3c209a43246..10a506da4ea 100644
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@@ -40,8 +40,7 @@ use syntax::parse::{self, ParseSess};
 use syntax_pos::{MultiSpan, Span};
 use util::profiling::SelfProfiler;
 
-use rustc_target::spec::PanicStrategy;
-use rustc_target::spec::{Target, TargetTriple};
+use rustc_target::spec::{PanicStrategy, RelroLevel, Target, TargetTriple};
 use rustc_data_structures::flock;
 use jobserver::Client;
 
@@ -984,6 +983,27 @@ impl Session {
     pub fn edition(&self) -> Edition {
         self.opts.edition
     }
+
+    /// True if we cannot skip the PLT for shared library calls.
+    pub fn needs_plt(&self) -> bool {
+        // Check if the current target usually needs PLT to be enabled.
+        // The user can use the command line flag to override it.
+        let needs_plt = self.target.target.options.needs_plt;
+
+        let dbg_opts = &self.opts.debugging_opts;
+
+        let relro_level = dbg_opts.relro_level
+            .unwrap_or(self.target.target.options.relro_level);
+
+        // Only enable this optimization by default if full relro is also enabled.
+        // In this case, lazy binding was already unavailable, so nothing is lost.
+        // This also ensures `-Wl,-z,now` is supported by the linker.
+        let full_relro = RelroLevel::Full == relro_level;
+
+        // If user didn't explicitly forced us to use / skip the PLT,
+        // then try to skip it where possible.
+        dbg_opts.plt.unwrap_or(needs_plt || !full_relro)
+    }
 }
 
 pub fn build_session(
diff --git a/src/librustc_codegen_llvm/attributes.rs b/src/librustc_codegen_llvm/attributes.rs
index 0abc26d6cfb..90ba103ca4c 100644
--- a/src/librustc_codegen_llvm/attributes.rs
+++ b/src/librustc_codegen_llvm/attributes.rs
@@ -137,6 +137,15 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
             target_cpu.as_c_str());
 }
 
+/// Sets the `NonLazyBind` LLVM attribute on a given function,
+/// assuming the codegen options allow skipping the PLT.
+pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
+    // Don't generate calls through PLT if it's not necessary
+    if !sess.needs_plt() {
+        Attribute::NonLazyBind.apply_llfn(Function, llfn);
+    }
+}
+
 /// Composite function which sets LLVM attributes for function depending on its AST (#[attribute])
 /// attributes.
 pub fn from_fn_attrs(
diff --git a/src/librustc_codegen_llvm/context.rs b/src/librustc_codegen_llvm/context.rs
index 208649c143a..578018c7adc 100644
--- a/src/librustc_codegen_llvm/context.rs
+++ b/src/librustc_codegen_llvm/context.rs
@@ -208,6 +208,13 @@ pub unsafe fn create_module(
         llvm::LLVMRustSetModulePIELevel(llmod);
     }
 
+    // If skipping the PLT is enabled, we need to add some module metadata
+    // to ensure intrinsic calls don't use it.
+    if !sess.needs_plt() {
+        let avoid_plt = "RtLibUseGOT\0".as_ptr() as *const _;
+        llvm::LLVMRustAddModuleFlag(llmod, avoid_plt, 1);
+    }
+
     llmod
 }
 
diff --git a/src/librustc_codegen_llvm/declare.rs b/src/librustc_codegen_llvm/declare.rs
index 7141c9ece89..26969e24f08 100644
--- a/src/librustc_codegen_llvm/declare.rs
+++ b/src/librustc_codegen_llvm/declare.rs
@@ -104,6 +104,8 @@ fn declare_raw_fn(
         attributes::unwind(llfn, false);
     }
 
+    attributes::non_lazy_bind(cx.sess(), llfn);
+
     llfn
 }
 
diff --git a/src/librustc_codegen_llvm/llvm/ffi.rs b/src/librustc_codegen_llvm/llvm/ffi.rs
index 8485db4210c..c9f51efdc50 100644
--- a/src/librustc_codegen_llvm/llvm/ffi.rs
+++ b/src/librustc_codegen_llvm/llvm/ffi.rs
@@ -122,6 +122,7 @@ pub enum Attribute {
     SanitizeThread  = 20,
     SanitizeAddress = 21,
     SanitizeMemory  = 22,
+    NonLazyBind     = 23,
 }
 
 /// LLVMIntPredicate
diff --git a/src/librustc_target/spec/mod.rs b/src/librustc_target/spec/mod.rs
index 3f1e8ee5528..9c0f945326d 100644
--- a/src/librustc_target/spec/mod.rs
+++ b/src/librustc_target/spec/mod.rs
@@ -576,6 +576,9 @@ pub struct TargetOptions {
     /// the functions in the executable are not randomized and can be used
     /// during an exploit of a vulnerability in any code.
     pub position_independent_executables: bool,
+    /// Determines if the target always requires using the PLT for indirect
+    /// library calls or not. This controls the default value of the `-Z plt` flag.
+    pub needs_plt: bool,
     /// Either partial, full, or off. Full RELRO makes the dynamic linker
     /// resolve all symbols at startup and marks the GOT read-only before
     /// starting the program, preventing overwriting the GOT.
@@ -720,6 +723,7 @@ impl Default for TargetOptions {
             has_rpath: false,
             no_default_libraries: true,
             position_independent_executables: false,
+            needs_plt: false,
             relro_level: RelroLevel::None,
             pre_link_objects_exe: Vec::new(),
             pre_link_objects_exe_crt: Vec::new(),
@@ -1009,6 +1013,7 @@ impl Target {
         key!(has_rpath, bool);
         key!(no_default_libraries, bool);
         key!(position_independent_executables, bool);
+        key!(needs_plt, bool);
         try!(key!(relro_level, RelroLevel));
         key!(archive_format);
         key!(allow_asm, bool);
@@ -1217,6 +1222,7 @@ impl ToJson for Target {
         target_option_val!(has_rpath);
         target_option_val!(no_default_libraries);
         target_option_val!(position_independent_executables);
+        target_option_val!(needs_plt);
         target_option_val!(relro_level);
         target_option_val!(archive_format);
         target_option_val!(allow_asm);
diff --git a/src/librustc_target/spec/x86_64_unknown_linux_gnux32.rs b/src/librustc_target/spec/x86_64_unknown_linux_gnux32.rs
index 72b5bd27c7d..fd61067ba51 100644
--- a/src/librustc_target/spec/x86_64_unknown_linux_gnux32.rs
+++ b/src/librustc_target/spec/x86_64_unknown_linux_gnux32.rs
@@ -17,6 +17,9 @@ pub fn target() -> TargetResult {
     base.pre_link_args.get_mut(&LinkerFlavor::Gcc).unwrap().push("-mx32".to_string());
     base.stack_probes = true;
     base.has_elf_tls = false;
+    // BUG(GabrielMajeri): disabling the PLT on x86_64 Linux with x32 ABI
+    // breaks code gen. See LLVM bug 36743
+    base.needs_plt = true;
 
     Ok(Target {
         llvm_target: "x86_64-unknown-linux-gnux32".to_string(),
diff --git a/src/rustllvm/RustWrapper.cpp b/src/rustllvm/RustWrapper.cpp
index f1ab1d4ddfa..2b1bf1c0290 100644
--- a/src/rustllvm/RustWrapper.cpp
+++ b/src/rustllvm/RustWrapper.cpp
@@ -178,6 +178,8 @@ static Attribute::AttrKind fromRust(LLVMRustAttribute Kind) {
     return Attribute::SanitizeAddress;
   case SanitizeMemory:
     return Attribute::SanitizeMemory;
+  case NonLazyBind:
+    return Attribute::NonLazyBind;
   }
   report_fatal_error("bad AttributeKind");
 }
diff --git a/src/rustllvm/rustllvm.h b/src/rustllvm/rustllvm.h
index 1070068b998..b6fa9a2fa95 100644
--- a/src/rustllvm/rustllvm.h
+++ b/src/rustllvm/rustllvm.h
@@ -97,6 +97,7 @@ enum LLVMRustAttribute {
   SanitizeThread = 20,
   SanitizeAddress = 21,
   SanitizeMemory = 22,
+  NonLazyBind = 23,
 };
 
 typedef struct OpaqueRustString *RustStringRef;
diff --git a/src/test/codegen/naked-functions.rs b/src/test/codegen/naked-functions.rs
index aab5f1bfb4f..2cf8ce00bfb 100644
--- a/src/test/codegen/naked-functions.rs
+++ b/src/test/codegen/naked-functions.rs
@@ -15,7 +15,7 @@
 #![crate_type = "lib"]
 #![feature(naked_functions)]
 
-// CHECK: Function Attrs: naked uwtable
+// CHECK: Function Attrs: naked
 // CHECK-NEXT: define void @naked_empty()
 #[no_mangle]
 #[naked]
@@ -24,7 +24,7 @@ pub fn naked_empty() {
     // CHECK-NEXT: ret void
 }
 
-// CHECK: Function Attrs: naked uwtable
+// CHECK: Function Attrs: naked
 #[no_mangle]
 #[naked]
 // CHECK-NEXT: define void @naked_with_args(i{{[0-9]+}})
@@ -35,7 +35,7 @@ pub fn naked_with_args(a: isize) {
     // CHECK: ret void
 }
 
-// CHECK: Function Attrs: naked uwtable
+// CHECK: Function Attrs: naked
 // CHECK-NEXT: define i{{[0-9]+}} @naked_with_return()
 #[no_mangle]
 #[naked]
@@ -45,7 +45,7 @@ pub fn naked_with_return() -> isize {
     0
 }
 
-// CHECK: Function Attrs: naked uwtable
+// CHECK: Function Attrs: naked
 // CHECK-NEXT: define i{{[0-9]+}} @naked_with_args_and_return(i{{[0-9]+}})
 #[no_mangle]
 #[naked]
@@ -57,7 +57,7 @@ pub fn naked_with_args_and_return(a: isize) -> isize {
     a
 }
 
-// CHECK: Function Attrs: naked uwtable
+// CHECK: Function Attrs: naked
 // CHECK-NEXT: define void @naked_recursive()
 #[no_mangle]
 #[naked]
diff --git a/src/test/codegen/no-plt.rs b/src/test/codegen/no-plt.rs
new file mode 100644
index 00000000000..8f302e57902
--- /dev/null
+++ b/src/test/codegen/no-plt.rs
@@ -0,0 +1,27 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -C relocation-model=pic -Z plt=no
+
+#![crate_type = "lib"]
+
+// We need a function which is normally called through the PLT.
+extern "C" {
+    // CHECK: Function Attrs: nounwind nonlazybind
+    fn getenv(name: *const u8) -> *mut u8;
+}
+
+// Ensure the function gets referenced.
+pub unsafe fn call_through_plt() -> *mut u8 {
+    getenv(b"\0".as_ptr())
+}
+
+// Ensure intrinsics also skip the PLT
+// CHECK: !"RtLibUseGOT"