about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlex Crichton <alex@alexcrichton.com>2018-01-16 15:30:57 -0800
committerAlex Crichton <alex@alexcrichton.com>2018-01-21 20:49:56 -0800
commit66366f96267949cf41a992bec477e04763c42565 (patch)
tree7293dcc37403885a3c06ce0085ba1ea2344318dd
parentff2a7c85757542f454934e00605355bbca7bc196 (diff)
downloadrust-66366f96267949cf41a992bec477e04763c42565.tar.gz
rust-66366f96267949cf41a992bec477e04763c42565.zip
rustc: Lower link args to `@`-files on Windows more
When spawning a linker rustc has historically been known to blow OS limits for
the command line being too large, notably on Windows. This is especially true of
incremental compilation where there can be dozens of object files per
compilation. The compiler currently has logic for detecting a failure to spawn
and instead passing arguments via a file instead, but this failure detection
only triggers if a process actually fails to spawn.

Unfortunately on Windows we've got something else to worry about which is
`cmd.exe`. The compiler may be running a linker through `cmd.exe` where
`cmd.exe` has a limit of 8192 on the command line vs 32k on `CreateProcess`.
Moreso rustc actually succeeds in spawning `cmd.exe` today, it's just that after
it's running `cmd.exe` fails to spawn its child, which rustc doesn't currently
detect.

Consequently this commit updates the logic for the spawning the linker on
Windows to instead have a heuristic to see if we need to pass arguments via a
file. This heuristic is an overly pessimistic and "inaccurate" calculation which
just calls `len` on a bunch of `OsString` instances (where `len` is not
precisely the length in u16 elements). This number, when exceeding the 6k
threshold, will force rustc to always pass arguments through a file.

This strategy should avoid us trying to parse the output on Windows of the
linker to see if it successfully spawned yet failed to actually sub-spawn the
linker. We may just be passing arguments through files a little more commonly
now...

The motivation for this commit was a recent bug in Gecko [1] when beta testing,
notably when incremental compilation was enabled it blew out the limit on
`cmd.exe`. This commit will also fix #46999 as well though as emscripten uses a
bat script as well (and we're blowing the limit there).

[1]: https://bugzilla.mozilla.org/show_bug.cgi?id=1430886

Closes #46999
-rw-r--r--src/librustc_trans/back/command.rs72
-rw-r--r--src/librustc_trans/back/link.rs38
-rw-r--r--src/test/run-make/long-linker-command-lines-cmd-exe/Makefile6
-rw-r--r--src/test/run-make/long-linker-command-lines-cmd-exe/foo.bat1
-rw-r--r--src/test/run-make/long-linker-command-lines-cmd-exe/foo.rs96
5 files changed, 181 insertions, 32 deletions
diff --git a/src/librustc_trans/back/command.rs b/src/librustc_trans/back/command.rs
index f93f317a0a0..3b765a493e0 100644
--- a/src/librustc_trans/back/command.rs
+++ b/src/librustc_trans/back/command.rs
@@ -14,22 +14,34 @@
 use std::ffi::{OsStr, OsString};
 use std::fmt;
 use std::io;
+use std::mem;
 use std::process::{self, Output};
 
+#[derive(Clone)]
 pub struct Command {
-    program: OsString,
+    program: Program,
     args: Vec<OsString>,
     env: Vec<(OsString, OsString)>,
 }
 
+#[derive(Clone)]
+enum Program {
+    Normal(OsString),
+    CmdBatScript(OsString),
+}
+
 impl Command {
     pub fn new<P: AsRef<OsStr>>(program: P) -> Command {
-        Command::_new(program.as_ref())
+        Command::_new(Program::Normal(program.as_ref().to_owned()))
+    }
+
+    pub fn bat_script<P: AsRef<OsStr>>(program: P) -> Command {
+        Command::_new(Program::CmdBatScript(program.as_ref().to_owned()))
     }
 
-    fn _new(program: &OsStr) -> Command {
+    fn _new(program: Program) -> Command {
         Command {
-            program: program.to_owned(),
+            program,
             args: Vec::new(),
             env: Vec::new(),
         }
@@ -82,7 +94,14 @@ impl Command {
     }
 
     pub fn command(&self) -> process::Command {
-        let mut ret = process::Command::new(&self.program);
+        let mut ret = match self.program {
+            Program::Normal(ref p) => process::Command::new(p),
+            Program::CmdBatScript(ref p) => {
+                let mut c = process::Command::new("cmd");
+                c.arg("/c").arg(p);
+                c
+            }
+        };
         ret.args(&self.args);
         ret.envs(self.env.clone());
         return ret
@@ -90,16 +109,45 @@ impl Command {
 
     // extensions
 
-    pub fn get_program(&self) -> &OsStr {
-        &self.program
+    pub fn take_args(&mut self) -> Vec<OsString> {
+        mem::replace(&mut self.args, Vec::new())
     }
 
-    pub fn get_args(&self) -> &[OsString] {
-        &self.args
-    }
+    /// Returns a `true` if we're pretty sure that this'll blow OS spawn limits,
+    /// or `false` if we should attempt to spawn and see what the OS says.
+    pub fn very_likely_to_exceed_some_spawn_limit(&self) -> bool {
+        // We mostly only care about Windows in this method, on Unix the limits
+        // can be gargantuan anyway so we're pretty unlikely to hit them
+        if cfg!(unix) {
+            return false
+        }
 
-    pub fn get_env(&self) -> &[(OsString, OsString)] {
-        &self.env
+        // Ok so on Windows to spawn a process is 32,768 characters in its
+        // command line [1]. Unfortunately we don't actually have access to that
+        // as it's calculated just before spawning. Instead we perform a
+        // poor-man's guess as to how long our command line will be. We're
+        // assuming here that we don't have to escape every character...
+        //
+        // Turns out though that `cmd.exe` has even smaller limits, 8192
+        // characters [2]. Linkers can often be batch scripts (for example
+        // Emscripten, Gecko's current build system) which means that we're
+        // running through batch scripts. These linkers often just forward
+        // arguments elsewhere (and maybe tack on more), so if we blow 8192
+        // bytes we'll typically cause them to blow as well.
+        //
+        // Basically as a result just perform an inflated estimate of what our
+        // command line will look like and test if it's > 8192 (we actually
+        // test against 6k to artificially inflate our estimate). If all else
+        // fails we'll fall back to the normal unix logic of testing the OS
+        // error code if we fail to spawn and automatically re-spawning the
+        // linker with smaller arguments.
+        //
+        // [1]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms682425(v=vs.85).aspx
+        // [2]: https://blogs.msdn.microsoft.com/oldnewthing/20031210-00/?p=41553
+
+        let estimated_command_line_len =
+            self.args.iter().map(|a| a.len()).sum::<usize>();
+        estimated_command_line_len > 1024 * 6
     }
 }
 
diff --git a/src/librustc_trans/back/link.rs b/src/librustc_trans/back/link.rs
index 62b65b7952e..923e5549927 100644
--- a/src/librustc_trans/back/link.rs
+++ b/src/librustc_trans/back/link.rs
@@ -36,8 +36,8 @@ use std::char;
 use std::env;
 use std::ffi::OsString;
 use std::fmt;
-use std::fs::{self, File};
-use std::io::{self, Write, BufWriter};
+use std::fs;
+use std::io;
 use std::path::{Path, PathBuf};
 use std::process::{Output, Stdio};
 use std::str;
@@ -71,9 +71,7 @@ pub fn get_linker(sess: &Session) -> (PathBuf, Command, Vec<(OsString, OsString)
     let cmd = |linker: &Path| {
         if let Some(linker) = linker.to_str() {
             if cfg!(windows) && linker.ends_with(".bat") {
-                let mut cmd = Command::new("cmd");
-                cmd.arg("/c").arg(linker);
-                return cmd
+                return Command::bat_script(linker)
             }
         }
         Command::new(linker)
@@ -758,26 +756,26 @@ fn exec_linker(sess: &Session, cmd: &mut Command, tmpdir: &Path)
     // that contains all the arguments. The theory is that this is then
     // accepted on all linkers and the linker will read all its options out of
     // there instead of looking at the command line.
-    match cmd.command().stdout(Stdio::piped()).stderr(Stdio::piped()).spawn() {
-        Ok(child) => return child.wait_with_output(),
-        Err(ref e) if command_line_too_big(e) => {}
-        Err(e) => return Err(e)
+    if !cmd.very_likely_to_exceed_some_spawn_limit() {
+        match cmd.command().stdout(Stdio::piped()).stderr(Stdio::piped()).spawn() {
+            Ok(child) => return child.wait_with_output(),
+            Err(ref e) if command_line_too_big(e) => {}
+            Err(e) => return Err(e)
+        }
     }
 
-    let file = tmpdir.join("linker-arguments");
-    let mut cmd2 = Command::new(cmd.get_program());
-    cmd2.arg(format!("@{}", file.display()));
-    for &(ref k, ref v) in cmd.get_env() {
-        cmd2.env(k, v);
-    }
-    let mut f = BufWriter::new(File::create(&file)?);
-    for arg in cmd.get_args() {
-        writeln!(f, "{}", Escape {
+    let mut cmd2 = cmd.clone();
+    let mut args = String::new();
+    for arg in cmd2.take_args() {
+        args.push_str(&Escape {
             arg: arg.to_str().unwrap(),
             is_like_msvc: sess.target.target.options.is_like_msvc,
-        })?;
+        }.to_string());
+        args.push_str("\n");
     }
-    f.into_inner()?;
+    let file = tmpdir.join("linker-arguments");
+    fs::write(&file, args.as_bytes())?;
+    cmd2.arg(format!("@{}", file.display()));
     return cmd2.output();
 
     #[cfg(unix)]
diff --git a/src/test/run-make/long-linker-command-lines-cmd-exe/Makefile b/src/test/run-make/long-linker-command-lines-cmd-exe/Makefile
new file mode 100644
index 00000000000..debe9e93824
--- /dev/null
+++ b/src/test/run-make/long-linker-command-lines-cmd-exe/Makefile
@@ -0,0 +1,6 @@
+-include ../tools.mk
+
+all:
+	$(RUSTC) foo.rs -g
+	cp foo.bat $(TMPDIR)/
+	OUT_DIR="$(TMPDIR)" RUSTC="$(RUSTC_ORIGINAL)" $(call RUN,foo)
diff --git a/src/test/run-make/long-linker-command-lines-cmd-exe/foo.bat b/src/test/run-make/long-linker-command-lines-cmd-exe/foo.bat
new file mode 100644
index 00000000000..a9350f12bbb
--- /dev/null
+++ b/src/test/run-make/long-linker-command-lines-cmd-exe/foo.bat
@@ -0,0 +1 @@
+%MY_LINKER% %*
diff --git a/src/test/run-make/long-linker-command-lines-cmd-exe/foo.rs b/src/test/run-make/long-linker-command-lines-cmd-exe/foo.rs
new file mode 100644
index 00000000000..f9168a82e22
--- /dev/null
+++ b/src/test/run-make/long-linker-command-lines-cmd-exe/foo.rs
@@ -0,0 +1,96 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// Like the `long-linker-command-lines` test this test attempts to blow
+// a command line limit for running the linker. Unlike that test, however,
+// this test is testing `cmd.exe` specifically rather than the OS.
+//
+// Unfortunately `cmd.exe` has a 8192 limit which is relatively small
+// in the grand scheme of things and anyone sripting rustc's linker
+// is probably using a `*.bat` script and is likely to hit this limit.
+//
+// This test uses a `foo.bat` script as the linker which just simply
+// delegates back to this program. The compiler should use a lower
+// limit for arguments before passing everything via `@`, which
+// means that everything should still succeed here.
+
+use std::env;
+use std::fs::{self, File};
+use std::io::{BufWriter, Write, Read};
+use std::path::PathBuf;
+use std::process::Command;
+
+fn main() {
+    if !cfg!(windows) {
+        return
+    }
+
+    let tmpdir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
+    let ok = tmpdir.join("ok");
+    let not_ok = tmpdir.join("not_ok");
+    if env::var("YOU_ARE_A_LINKER").is_ok() {
+        match env::args().find(|a| a.contains("@")) {
+            Some(file) => { fs::copy(&file[1..], &ok).unwrap(); }
+            None => { File::create(&not_ok).unwrap(); }
+        }
+        return
+    }
+
+    let rustc = env::var_os("RUSTC").unwrap_or("rustc".into());
+    let me = env::current_exe().unwrap();
+    let bat = me.parent()
+        .unwrap()
+        .join("foo.bat");
+    let bat_linker = format!("linker={}", bat.display());
+    for i in (1..).map(|i| i * 10) {
+        println!("attempt: {}", i);
+
+        let file = tmpdir.join("bar.rs");
+        let mut f = BufWriter::new(File::create(&file).unwrap());
+        let mut lib_name = String::new();
+        for _ in 0..i {
+            lib_name.push_str("foo");
+        }
+        for j in 0..i {
+            writeln!(f, "#[link(name = \"{}{}\")]", lib_name, j).unwrap();
+        }
+        writeln!(f, "extern {{}}\nfn main() {{}}").unwrap();
+        f.into_inner().unwrap();
+
+        drop(fs::remove_file(&ok));
+        drop(fs::remove_file(&not_ok));
+        let status = Command::new(&rustc)
+            .arg(&file)
+            .arg("-C").arg(&bat_linker)
+            .arg("--out-dir").arg(&tmpdir)
+            .env("YOU_ARE_A_LINKER", "1")
+            .env("MY_LINKER", &me)
+            .status()
+            .unwrap();
+
+        if !status.success() {
+            panic!("rustc didn't succeed: {}", status);
+        }
+
+        if !ok.exists() {
+            assert!(not_ok.exists());
+            continue
+        }
+
+        let mut contents = String::new();
+        File::open(&ok).unwrap().read_to_string(&mut contents).unwrap();
+
+        for j in 0..i {
+            assert!(contents.contains(&format!("{}{}", lib_name, j)));
+        }
+
+        break
+    }
+}