Auto merge of #47269 - michaelwoerister:mangled-cgu-names, r=alexcrichton

Shorten names of some compiler generated artifacts. This PR makes the compiler mangle codegen unit names by default. The name of every codegen unit name will now be a random string of 16 characters. It also makes the file extensions of some intermediate compiler products shorter. Hopefully, these changes will reduce the pressure on tools with path length restrictions like buildbot. The change should also solve problems with case-insensitive file system. cc #47186 and #47222 r? @alexcrichton
author: bors <bors@rust-lang.org> 2018-01-09 16:04:21 +0000
committer: bors <bors@rust-lang.org> 2018-01-09 16:04:21 +0000
commit: 61452e506f0c88861cccaeea4ced3419bdb3cbe0 (patch)
tree: f6630e34f7e3b44761f02356fe13192b78d68a2f
parent: 2e33c89ff1518359c4bd5fbed1571ea00cb3b146 (diff)
parent: 94f3037f4bc2f18ac1c427e145928cc766b573fe (diff)
download: rust-61452e506f0c88861cccaeea4ced3419bdb3cbe0.tar.gz
rust-61452e506f0c88861cccaeea4ced3419bdb3cbe0.zip
10 files changed, 70 insertions, 28 deletions
diff --git a/src/librustc/mir/mono.rs b/src/librustc/mir/mono.rs
index 5f74f088237..efdf4066815 100644
--- a/src/librustc/mir/mono.rs
+++ b/src/librustc/mir/mono.rs
@@ -12,9 +12,11 @@ use syntax::ast::NodeId;
 use syntax::symbol::InternedString;
 use ty::Instance;
 use util::nodemap::FxHashMap;
+use rustc_data_structures::base_n;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
                                            StableHasher};
 use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
+use std::hash::Hash;
 
 #[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
 pub enum MonoItem<'tcx> {
@@ -119,6 +121,16 @@ impl<'tcx> CodegenUnit<'tcx> {
     {
         &mut self.items
     }
+
+    pub fn mangle_name(human_readable_name: &str) -> String {
+        // We generate a 80 bit hash from the name. This should be enough to
+        // avoid collisions and is still reasonably short for filenames.
+        let mut hasher = StableHasher::new();
+        human_readable_name.hash(&mut hasher);
+        let hash: u128 = hasher.finish();
+        let hash = hash & ((1u128 << 80) - 1);
+        base_n::encode(hash, base_n::CASE_INSENSITIVE)
+    }
 }
 
 impl<'tcx> HashStable<StableHashingContext<'tcx>> for CodegenUnit<'tcx> {
diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs
index 5e470e8d86e..8c8108b0604 100644
--- a/src/librustc/session/config.rs
+++ b/src/librustc/session/config.rs
@@ -1234,6 +1234,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
         "rewrite operators on i128 and u128 into lang item calls (typically provided \
          by compiler-builtins) so translation doesn't need to support them,
          overriding the default for the current target"),
+    human_readable_cgu_names: bool = (false, parse_bool, [TRACKED],
+        "generate human-readable, predictable names for codegen units"),
 }
 
 pub fn default_lib_output() -> CrateType {
diff --git a/src/librustc_data_structures/base_n.rs b/src/librustc_data_structures/base_n.rs
index cf54229fa7f..d333b6393b9 100644
--- a/src/librustc_data_structures/base_n.rs
+++ b/src/librustc_data_structures/base_n.rs
@@ -13,18 +13,21 @@
 
 use std::str;
 
-pub const MAX_BASE: u64 = 64;
-pub const ALPHANUMERIC_ONLY: u64 = 62;
+pub const MAX_BASE: usize = 64;
+pub const ALPHANUMERIC_ONLY: usize = 62;
+pub const CASE_INSENSITIVE: usize = 36;
 
 const BASE_64: &'static [u8; MAX_BASE as usize] =
     b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
 
 #[inline]
-pub fn push_str(mut n: u64, base: u64, output: &mut String) {
+pub fn push_str(mut n: u128, base: usize, output: &mut String) {
     debug_assert!(base >= 2 && base <= MAX_BASE);
-    let mut s = [0u8; 64];
+    let mut s = [0u8; 128];
     let mut index = 0;
 
+    let base = base as u128;
+
     loop {
         s[index] = BASE_64[(n % base) as usize];
         index += 1;
@@ -39,16 +42,16 @@ pub fn push_str(mut n: u64, base: u64, output: &mut String) {
 }
 
 #[inline]
-pub fn encode(n: u64, base: u64) -> String {
-    let mut s = String::with_capacity(13);
+pub fn encode(n: u128, base: usize) -> String {
+    let mut s = String::new();
     push_str(n, base, &mut s);
     s
 }
 
 #[test]
 fn test_encode() {
-    fn test(n: u64, base: u64) {
-        assert_eq!(Ok(n), u64::from_str_radix(&encode(n, base), base as u32));
+    fn test(n: u128, base: usize) {
+        assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
     }
 
     for base in 2..37 {
@@ -57,7 +60,8 @@ fn test_encode() {
         test(35, base);
         test(36, base);
         test(37, base);
-        test(u64::max_value(), base);
+        test(u64::max_value() as u128, base);
+        test(u128::max_value(), base);
 
         for i in 0 .. 1_000 {
             test(i * 983, base);
diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs
index 42b1fcccace..f4171f951f4 100644
--- a/src/librustc_incremental/persist/fs.rs
+++ b/src/librustc_incremental/persist/fs.rs
@@ -137,7 +137,7 @@ const QUERY_CACHE_FILENAME: &'static str = "query-cache.bin";
 // or hexadecimal numbers (we want short file and directory names). Since these
 // numbers will be used in file names, we choose an encoding that is not
 // case-sensitive (as opposed to base64, for example).
-const INT_ENCODE_BASE: u64 = 36;
+const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE;
 
 pub fn dep_graph_path(sess: &Session) -> PathBuf {
     in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME)
@@ -357,7 +357,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) {
     let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]);
 
     // Append the svh
-    base_n::push_str(svh.as_u64(), INT_ENCODE_BASE, &mut new_sub_dir_name);
+    base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name);
 
     // Create the full path
     let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
@@ -465,7 +465,7 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
 
     let directory_name = format!("s-{}-{}-working",
                                   timestamp,
-                                  base_n::encode(random_number as u64,
+                                  base_n::encode(random_number as u128,
                                                  INT_ENCODE_BASE));
     debug!("generate_session_dir_path: directory_name = {}", directory_name);
     let directory_path = crate_dir.join(directory_name);
@@ -599,7 +599,7 @@ fn timestamp_to_string(timestamp: SystemTime) -> String {
     let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
     let micros = duration.as_secs() * 1_000_000 +
                 (duration.subsec_nanos() as u64) / 1000;
-    base_n::encode(micros, INT_ENCODE_BASE)
+    base_n::encode(micros as u128, INT_ENCODE_BASE)
 }
 
 fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> {
@@ -626,7 +626,8 @@ fn crate_path(sess: &Session,
     // The full crate disambiguator is really long. 64 bits of it should be
     // sufficient.
     let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash();
-    let crate_disambiguator = base_n::encode(crate_disambiguator, INT_ENCODE_BASE);
+    let crate_disambiguator = base_n::encode(crate_disambiguator as u128,
+                                             INT_ENCODE_BASE);
 
     let crate_name = format!("{}-{}", crate_name, crate_disambiguator);
     incr_dir.join(crate_name)
diff --git a/src/librustc_incremental/persist/work_product.rs b/src/librustc_incremental/persist/work_product.rs
index f23b8dc85b8..879132bcacf 100644
--- a/src/librustc_incremental/persist/work_product.rs
+++ b/src/librustc_incremental/persist/work_product.rs
@@ -35,9 +35,9 @@ pub fn save_trans_partition(sess: &Session,
                  let extension = match kind {
                      WorkProductFileKind::Object => "o",
                      WorkProductFileKind::Bytecode => "bc",
-                     WorkProductFileKind::BytecodeCompressed => "bc-compressed",
+                     WorkProductFileKind::BytecodeCompressed => "bc.z",
                  };
-                 let file_name = format!("cgu-{}.{}", cgu_name, extension);
+                 let file_name = format!("{}.{}", cgu_name, extension);
                  let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name);
                  match link_or_copy(path, &path_in_incr_dir) {
                      Ok(_) => Some((kind, file_name)),
diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs
index 11c68a11669..e899cc072e0 100644
--- a/src/librustc_mir/monomorphize/partitioning.rs
+++ b/src/librustc_mir/monomorphize/partitioning.rs
@@ -200,7 +200,16 @@ impl<'tcx> CodegenUnitExt<'tcx> for CodegenUnit<'tcx> {
 }
 
 // Anything we can't find a proper codegen unit for goes into this.
-const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
+fn fallback_cgu_name(tcx: TyCtxt) -> InternedString {
+    const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
+
+    if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
+        Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str()
+    } else {
+        Symbol::intern(&CodegenUnit::mangle_name(FALLBACK_CODEGEN_UNIT)).as_str()
+    }
+}
+
 
 pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                               trans_items: I,
@@ -297,7 +306,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
         let codegen_unit_name = match characteristic_def_id {
             Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
-            None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(),
+            None => fallback_cgu_name(tcx),
         };
 
         let make_codegen_unit = || {
@@ -381,7 +390,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
     // always ensure we have at least one CGU; otherwise, if we have a
     // crate with just types (for example), we could wind up with no CGU
     if codegen_units.is_empty() {
-        let codegen_unit_name = Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str();
+        let codegen_unit_name = fallback_cgu_name(tcx);
         codegen_units.insert(codegen_unit_name.clone(),
                              CodegenUnit::new(codegen_unit_name.clone()));
     }
@@ -630,10 +639,10 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
     // Unfortunately we cannot just use the `ty::item_path` infrastructure here
     // because we need paths to modules and the DefIds of those are not
     // available anymore for external items.
-    let mut mod_path = String::with_capacity(64);
+    let mut cgu_name = String::with_capacity(64);
 
     let def_path = tcx.def_path(def_id);
-    mod_path.push_str(&tcx.crate_name(def_path.krate).as_str());
+    cgu_name.push_str(&tcx.crate_name(def_path.krate).as_str());
 
     for part in tcx.def_path(def_id)
                    .data
@@ -644,15 +653,21 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                             _ => false,
                         }
                     }) {
-        mod_path.push_str("-");
-        mod_path.push_str(&part.data.as_interned_str());
+        cgu_name.push_str("-");
+        cgu_name.push_str(&part.data.as_interned_str());
     }
 
     if volatile {
-        mod_path.push_str(".volatile");
+        cgu_name.push_str(".volatile");
     }
 
-    return Symbol::intern(&mod_path[..]).as_str();
+    let cgu_name = if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
+        cgu_name
+    } else {
+        CodegenUnit::mangle_name(&cgu_name)
+    };
+
+    Symbol::intern(&cgu_name[..]).as_str()
 }
 
 fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString {
diff --git a/src/librustc_trans/assert_module_sources.rs b/src/librustc_trans/assert_module_sources.rs
index c891bd8aaf4..0e8af1b9511 100644
--- a/src/librustc_trans/assert_module_sources.rs
+++ b/src/librustc_trans/assert_module_sources.rs
@@ -28,8 +28,10 @@
 //! perturb the reuse results.
 
 use rustc::dep_graph::{DepNode, DepConstructor};
+use rustc::mir::mono::CodegenUnit;
 use rustc::ty::TyCtxt;
 use syntax::ast;
+use syntax_pos::symbol::Symbol;
 use rustc::ich::{ATTR_PARTITION_REUSED, ATTR_PARTITION_TRANSLATED};
 
 const MODULE: &'static str = "module";
@@ -71,9 +73,11 @@ impl<'a, 'tcx> AssertModuleSource<'a, 'tcx> {
         }
 
         let mname = self.field(attr, MODULE);
+        let mangled_cgu_name = CodegenUnit::mangle_name(&mname.as_str());
+        let mangled_cgu_name = Symbol::intern(&mangled_cgu_name).as_str();
 
         let dep_node = DepNode::new(self.tcx,
-                                    DepConstructor::CompileCodegenUnit(mname.as_str()));
+                                    DepConstructor::CompileCodegenUnit(mangled_cgu_name));
 
         if let Some(loaded_from_cache) = self.tcx.dep_graph.was_loaded_from_cache(&dep_node) {
             match (disposition, loaded_from_cache) {
diff --git a/src/librustc_trans/back/bytecode.rs b/src/librustc_trans/back/bytecode.rs
index 9e4630c08f9..212d1aaf055 100644
--- a/src/librustc_trans/back/bytecode.rs
+++ b/src/librustc_trans/back/bytecode.rs
@@ -47,7 +47,7 @@ pub const RLIB_BYTECODE_OBJECT_MAGIC: &'static [u8] = b"RUST_OBJECT";
 // The version number this compiler will write to bytecode objects in rlibs
 pub const RLIB_BYTECODE_OBJECT_VERSION: u8 = 2;
 
-pub const RLIB_BYTECODE_EXTENSION: &str = "bytecode.encoded";
+pub const RLIB_BYTECODE_EXTENSION: &str = "bc.z";
 
 pub fn encode(identifier: &str, bytecode: &[u8]) -> Vec<u8> {
     let mut encoded = Vec::new();
diff --git a/src/librustc_trans/context.rs b/src/librustc_trans/context.rs
index 248b37c43b4..3014963a97f 100644
--- a/src/librustc_trans/context.rs
+++ b/src/librustc_trans/context.rs
@@ -572,7 +572,7 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> {
         let mut name = String::with_capacity(prefix.len() + 6);
         name.push_str(prefix);
         name.push_str(".");
-        base_n::push_str(idx as u64, base_n::ALPHANUMERIC_ONLY, &mut name);
+        base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
         name
     }
 
diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs
index dbeee39e606..774733e7068 100644
--- a/src/tools/compiletest/src/runtest.rs
+++ b/src/tools/compiletest/src/runtest.rs
@@ -1520,6 +1520,10 @@ impl<'test> TestCx<'test> {
             rustc.args(&["-Z", "incremental-queries"]);
         }
 
+        if self.config.mode == CodegenUnits {
+            rustc.args(&["-Z", "human_readable_cgu_names"]);
+        }
+
         match self.config.mode {
             CompileFail | ParseFail | Incremental => {
                 // If we are extracting and matching errors in the new
author	bors <bors@rust-lang.org>	2018-01-09 16:04:21 +0000
committer	bors <bors@rust-lang.org>	2018-01-09 16:04:21 +0000
commit	61452e506f0c88861cccaeea4ced3419bdb3cbe0 (patch)
tree	f6630e34f7e3b44761f02356fe13192b78d68a2f
parent	2e33c89ff1518359c4bd5fbed1571ea00cb3b146 (diff)
parent	94f3037f4bc2f18ac1c427e145928cc766b573fe (diff)
download	rust-61452e506f0c88861cccaeea4ced3419bdb3cbe0.tar.gz rust-61452e506f0c88861cccaeea4ced3419bdb3cbe0.zip