about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2020-10-28 14:52:20 +0000
committerbors <bors@rust-lang.org>2020-10-28 14:52:20 +0000
commit717eb6ccea0f9e0079eab82c047bbc1ebde725c2 (patch)
tree2d8519cfa14bffce13c5d7dc95c81b0486862083
parent2eb4fc800aaf5006f89af3af591e2aa34f469d81 (diff)
parentc2f4bbd17614bf54353ccd913463744d6f7c9242 (diff)
downloadrust-717eb6ccea0f9e0079eab82c047bbc1ebde725c2.tar.gz
rust-717eb6ccea0f9e0079eab82c047bbc1ebde725c2.zip
Auto merge of #78409 - pietroalbini:build-manifest-checksum-cache, r=Mark-Simulacrum
Add checksums cache to build-manifest

During the release process we're currently calculating the SHA256 of each file three times:

1. In `build-manifest`, to fill the `hash = "f00"` keys of the manifests.
2. In `promote-release`, to generate the `.sha256` files.
3. In `promote-release`, to generate the `.asc` GPG signatures.

Calculations 1. and 2. could be merged into a single one if there was a way for `build-manifest` to pass the checksums it generated over to `promote-release`. Unfortunately calculation 3. can't be merged as GPG requires extra metadata to be hashed.

This PR adds support for merging 1. and 2. by creating the `BUILD_MANIFEST_CHECKSUM_CACHE` environment variable, which points to a JSON file storing a cache of all the calculated checksums. `build-manifest` will load it at startup and avoid generating existing checksums, and it will dump its internal checksums cache into it when it exits successfully.

This PR also allows to run `build-manifest` multiple times without the need to wait for checksums to be calculated in the following invocations. The speedup will allow to work torwards a fix for https://github.com/rust-lang/promote-release/issues/15 without impacting the release process duration nor our storage costs.

This PR can be reviewed commit-by-commit.
r? `@Mark-Simulacrum`
-rw-r--r--src/tools/build-manifest/src/checksum.rs97
-rw-r--r--src/tools/build-manifest/src/main.rs59
2 files changed, 106 insertions, 50 deletions
diff --git a/src/tools/build-manifest/src/checksum.rs b/src/tools/build-manifest/src/checksum.rs
new file mode 100644
index 00000000000..c019c7a2f7a
--- /dev/null
+++ b/src/tools/build-manifest/src/checksum.rs
@@ -0,0 +1,97 @@
+use crate::manifest::{FileHash, Manifest};
+use rayon::prelude::*;
+use sha2::{Digest, Sha256};
+use std::collections::{HashMap, HashSet};
+use std::error::Error;
+use std::fs::File;
+use std::io::BufReader;
+use std::path::{Path, PathBuf};
+use std::sync::Mutex;
+use std::time::Instant;
+
+pub(crate) struct Checksums {
+    cache_path: Option<PathBuf>,
+    collected: Mutex<HashMap<PathBuf, String>>,
+}
+
+impl Checksums {
+    pub(crate) fn new() -> Result<Self, Box<dyn Error>> {
+        let cache_path = std::env::var_os("BUILD_MANIFEST_CHECKSUM_CACHE").map(PathBuf::from);
+
+        let mut collected = HashMap::new();
+        if let Some(path) = &cache_path {
+            if path.is_file() {
+                collected = serde_json::from_slice(&std::fs::read(path)?)?;
+            }
+        }
+
+        Ok(Checksums { cache_path, collected: Mutex::new(collected) })
+    }
+
+    pub(crate) fn store_cache(&self) -> Result<(), Box<dyn Error>> {
+        if let Some(path) = &self.cache_path {
+            std::fs::write(path, &serde_json::to_vec(&self.collected)?)?;
+        }
+        Ok(())
+    }
+
+    pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) {
+        let need_checksums = self.find_missing_checksums(manifest);
+        if !need_checksums.is_empty() {
+            self.collect_checksums(&need_checksums);
+        }
+        self.replace_checksums(manifest);
+    }
+
+    fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf> {
+        let collected = self.collected.lock().unwrap();
+        let mut need_checksums = HashSet::new();
+        crate::manifest::visit_file_hashes(manifest, |file_hash| {
+            if let FileHash::Missing(path) = file_hash {
+                let path = std::fs::canonicalize(path).unwrap();
+                if !collected.contains_key(&path) {
+                    need_checksums.insert(path);
+                }
+            }
+        });
+        need_checksums
+    }
+
+    fn replace_checksums(&mut self, manifest: &mut Manifest) {
+        let collected = self.collected.lock().unwrap();
+        crate::manifest::visit_file_hashes(manifest, |file_hash| {
+            if let FileHash::Missing(path) = file_hash {
+                let path = std::fs::canonicalize(path).unwrap();
+                match collected.get(&path) {
+                    Some(hash) => *file_hash = FileHash::Present(hash.clone()),
+                    None => panic!("missing hash for file {}", path.display()),
+                }
+            }
+        });
+    }
+
+    fn collect_checksums(&mut self, files: &HashSet<PathBuf>) {
+        let collection_start = Instant::now();
+        println!(
+            "collecting hashes for {} tarballs across {} threads",
+            files.len(),
+            rayon::current_num_threads().min(files.len()),
+        );
+
+        files.par_iter().for_each(|path| match hash(path) {
+            Ok(hash) => {
+                self.collected.lock().unwrap().insert(path.clone(), hash);
+            }
+            Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
+        });
+
+        println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed());
+    }
+}
+
+fn hash(path: &Path) -> Result<String, Box<dyn Error>> {
+    let mut file = BufReader::new(File::open(path)?);
+    let mut sha256 = Sha256::default();
+    std::io::copy(&mut file, &mut sha256)?;
+    Ok(hex::encode(sha256.finalize()))
+}
diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs
index ffcf10571ca..2863216855b 100644
--- a/src/tools/build-manifest/src/main.rs
+++ b/src/tools/build-manifest/src/main.rs
@@ -4,22 +4,19 @@
 //! via `x.py dist hash-and-sign`; the cmdline arguments are set up
 //! by rustbuild (in `src/bootstrap/dist.rs`).
 
+mod checksum;
 mod manifest;
 mod versions;
 
-use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
+use crate::checksum::Checksums;
+use crate::manifest::{Component, Manifest, Package, Rename, Target};
 use crate::versions::{PkgType, Versions};
-use rayon::prelude::*;
-use sha2::Digest;
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::env;
-use std::error::Error;
 use std::fs::{self, File};
-use std::io::{self, BufReader, Read, Write};
+use std::io::{self, Read, Write};
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
-use std::sync::Mutex;
-use std::time::Instant;
 
 static HOSTS: &[&str] = &[
     "aarch64-apple-darwin",
@@ -186,6 +183,7 @@ macro_rules! t {
 
 struct Builder {
     versions: Versions,
+    checksums: Checksums,
     shipped_files: HashSet<String>,
 
     input: PathBuf,
@@ -240,6 +238,7 @@ fn main() {
 
     Builder {
         versions: Versions::new(&channel, &input).unwrap(),
+        checksums: t!(Checksums::new()),
         shipped_files: HashSet::new(),
 
         input,
@@ -276,6 +275,8 @@ impl Builder {
         if let Some(path) = std::env::var_os("BUILD_MANIFEST_SHIPPED_FILES_PATH") {
             self.write_shipped_files(&Path::new(&path));
         }
+
+        t!(self.checksums.store_cache());
     }
 
     /// If a tool does not pass its tests, don't ship it.
@@ -321,7 +322,7 @@ impl Builder {
         self.add_renames_to(&mut manifest);
         manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));
 
-        self.fill_missing_hashes(&mut manifest);
+        self.checksums.fill_missing_checksums(&mut manifest);
 
         manifest
     }
@@ -595,41 +596,6 @@ impl Builder {
         assert!(t!(child.wait()).success());
     }
 
-    fn fill_missing_hashes(&self, manifest: &mut Manifest) {
-        // First collect all files that need hashes
-        let mut need_hashes = HashSet::new();
-        crate::manifest::visit_file_hashes(manifest, |file_hash| {
-            if let FileHash::Missing(path) = file_hash {
-                need_hashes.insert(path.clone());
-            }
-        });
-
-        let collected = Mutex::new(HashMap::new());
-        let collection_start = Instant::now();
-        println!(
-            "collecting hashes for {} tarballs across {} threads",
-            need_hashes.len(),
-            rayon::current_num_threads().min(need_hashes.len()),
-        );
-        need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
-            Ok(hash) => {
-                collected.lock().unwrap().insert(path, hash);
-            }
-            Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
-        });
-        let collected = collected.into_inner().unwrap();
-        println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());
-
-        crate::manifest::visit_file_hashes(manifest, |file_hash| {
-            if let FileHash::Missing(path) = file_hash {
-                match collected.get(path) {
-                    Some(hash) => *file_hash = FileHash::Present(hash.clone()),
-                    None => panic!("missing hash for file {}", path.display()),
-                }
-            }
-        })
-    }
-
     fn write_channel_files(&mut self, channel_name: &str, manifest: &Manifest) {
         self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
         self.write(&manifest.date, channel_name, "-date.txt");
@@ -660,10 +626,3 @@ impl Builder {
         t!(std::fs::write(path, content.as_bytes()));
     }
 }
-
-fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
-    let mut file = BufReader::new(File::open(path)?);
-    let mut sha256 = sha2::Sha256::default();
-    std::io::copy(&mut file, &mut sha256)?;
-    Ok(hex::encode(sha256.finalize()))
-}