diff options
| author | bors <bors@rust-lang.org> | 2020-10-28 14:52:20 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2020-10-28 14:52:20 +0000 |
| commit | 717eb6ccea0f9e0079eab82c047bbc1ebde725c2 (patch) | |
| tree | 2d8519cfa14bffce13c5d7dc95c81b0486862083 | |
| parent | 2eb4fc800aaf5006f89af3af591e2aa34f469d81 (diff) | |
| parent | c2f4bbd17614bf54353ccd913463744d6f7c9242 (diff) | |
| download | rust-717eb6ccea0f9e0079eab82c047bbc1ebde725c2.tar.gz rust-717eb6ccea0f9e0079eab82c047bbc1ebde725c2.zip | |
Auto merge of #78409 - pietroalbini:build-manifest-checksum-cache, r=Mark-Simulacrum
Add checksums cache to build-manifest During the release process we're currently calculating the SHA256 of each file three times: 1. In `build-manifest`, to fill the `hash = "f00"` keys of the manifests. 2. In `promote-release`, to generate the `.sha256` files. 3. In `promote-release`, to generate the `.asc` GPG signatures. Calculations 1. and 2. could be merged into a single one if there was a way for `build-manifest` to pass the checksums it generated over to `promote-release`. Unfortunately calculation 3. can't be merged as GPG requires extra metadata to be hashed. This PR adds support for merging 1. and 2. by creating the `BUILD_MANIFEST_CHECKSUM_CACHE` environment variable, which points to a JSON file storing a cache of all the calculated checksums. `build-manifest` will load it at startup and avoid generating existing checksums, and it will dump its internal checksums cache into it when it exits successfully. This PR also allows to run `build-manifest` multiple times without the need to wait for checksums to be calculated in the following invocations. The speedup will allow to work torwards a fix for https://github.com/rust-lang/promote-release/issues/15 without impacting the release process duration nor our storage costs. This PR can be reviewed commit-by-commit. r? `@Mark-Simulacrum`
| -rw-r--r-- | src/tools/build-manifest/src/checksum.rs | 97 | ||||
| -rw-r--r-- | src/tools/build-manifest/src/main.rs | 59 |
2 files changed, 106 insertions, 50 deletions
diff --git a/src/tools/build-manifest/src/checksum.rs b/src/tools/build-manifest/src/checksum.rs new file mode 100644 index 00000000000..c019c7a2f7a --- /dev/null +++ b/src/tools/build-manifest/src/checksum.rs @@ -0,0 +1,97 @@ +use crate::manifest::{FileHash, Manifest}; +use rayon::prelude::*; +use sha2::{Digest, Sha256}; +use std::collections::{HashMap, HashSet}; +use std::error::Error; +use std::fs::File; +use std::io::BufReader; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; +use std::time::Instant; + +pub(crate) struct Checksums { + cache_path: Option<PathBuf>, + collected: Mutex<HashMap<PathBuf, String>>, +} + +impl Checksums { + pub(crate) fn new() -> Result<Self, Box<dyn Error>> { + let cache_path = std::env::var_os("BUILD_MANIFEST_CHECKSUM_CACHE").map(PathBuf::from); + + let mut collected = HashMap::new(); + if let Some(path) = &cache_path { + if path.is_file() { + collected = serde_json::from_slice(&std::fs::read(path)?)?; + } + } + + Ok(Checksums { cache_path, collected: Mutex::new(collected) }) + } + + pub(crate) fn store_cache(&self) -> Result<(), Box<dyn Error>> { + if let Some(path) = &self.cache_path { + std::fs::write(path, &serde_json::to_vec(&self.collected)?)?; + } + Ok(()) + } + + pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) { + let need_checksums = self.find_missing_checksums(manifest); + if !need_checksums.is_empty() { + self.collect_checksums(&need_checksums); + } + self.replace_checksums(manifest); + } + + fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf> { + let collected = self.collected.lock().unwrap(); + let mut need_checksums = HashSet::new(); + crate::manifest::visit_file_hashes(manifest, |file_hash| { + if let FileHash::Missing(path) = file_hash { + let path = std::fs::canonicalize(path).unwrap(); + if !collected.contains_key(&path) { + need_checksums.insert(path); + } + } + }); + need_checksums + } + + fn replace_checksums(&mut self, manifest: &mut Manifest) { + let collected = self.collected.lock().unwrap(); + crate::manifest::visit_file_hashes(manifest, |file_hash| { + if let FileHash::Missing(path) = file_hash { + let path = std::fs::canonicalize(path).unwrap(); + match collected.get(&path) { + Some(hash) => *file_hash = FileHash::Present(hash.clone()), + None => panic!("missing hash for file {}", path.display()), + } + } + }); + } + + fn collect_checksums(&mut self, files: &HashSet<PathBuf>) { + let collection_start = Instant::now(); + println!( + "collecting hashes for {} tarballs across {} threads", + files.len(), + rayon::current_num_threads().min(files.len()), + ); + + files.par_iter().for_each(|path| match hash(path) { + Ok(hash) => { + self.collected.lock().unwrap().insert(path.clone(), hash); + } + Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err), + }); + + println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed()); + } +} + +fn hash(path: &Path) -> Result<String, Box<dyn Error>> { + let mut file = BufReader::new(File::open(path)?); + let mut sha256 = Sha256::default(); + std::io::copy(&mut file, &mut sha256)?; + Ok(hex::encode(sha256.finalize())) +} diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs index ffcf10571ca..2863216855b 100644 --- a/src/tools/build-manifest/src/main.rs +++ b/src/tools/build-manifest/src/main.rs @@ -4,22 +4,19 @@ //! via `x.py dist hash-and-sign`; the cmdline arguments are set up //! by rustbuild (in `src/bootstrap/dist.rs`). +mod checksum; mod manifest; mod versions; -use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target}; +use crate::checksum::Checksums; +use crate::manifest::{Component, Manifest, Package, Rename, Target}; use crate::versions::{PkgType, Versions}; -use rayon::prelude::*; -use sha2::Digest; use std::collections::{BTreeMap, HashMap, HashSet}; use std::env; -use std::error::Error; use std::fs::{self, File}; -use std::io::{self, BufReader, Read, Write}; +use std::io::{self, Read, Write}; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; -use std::sync::Mutex; -use std::time::Instant; static HOSTS: &[&str] = &[ "aarch64-apple-darwin", @@ -186,6 +183,7 @@ macro_rules! t { struct Builder { versions: Versions, + checksums: Checksums, shipped_files: HashSet<String>, input: PathBuf, @@ -240,6 +238,7 @@ fn main() { Builder { versions: Versions::new(&channel, &input).unwrap(), + checksums: t!(Checksums::new()), shipped_files: HashSet::new(), input, @@ -276,6 +275,8 @@ impl Builder { if let Some(path) = std::env::var_os("BUILD_MANIFEST_SHIPPED_FILES_PATH") { self.write_shipped_files(&Path::new(&path)); } + + t!(self.checksums.store_cache()); } /// If a tool does not pass its tests, don't ship it. @@ -321,7 +322,7 @@ impl Builder { self.add_renames_to(&mut manifest); manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest)); - self.fill_missing_hashes(&mut manifest); + self.checksums.fill_missing_checksums(&mut manifest); manifest } @@ -595,41 +596,6 @@ impl Builder { assert!(t!(child.wait()).success()); } - fn fill_missing_hashes(&self, manifest: &mut Manifest) { - // First collect all files that need hashes - let mut need_hashes = HashSet::new(); - crate::manifest::visit_file_hashes(manifest, |file_hash| { - if let FileHash::Missing(path) = file_hash { - need_hashes.insert(path.clone()); - } - }); - - let collected = Mutex::new(HashMap::new()); - let collection_start = Instant::now(); - println!( - "collecting hashes for {} tarballs across {} threads", - need_hashes.len(), - rayon::current_num_threads().min(need_hashes.len()), - ); - need_hashes.par_iter().for_each(|path| match fetch_hash(path) { - Ok(hash) => { - collected.lock().unwrap().insert(path, hash); - } - Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err), - }); - let collected = collected.into_inner().unwrap(); - println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed()); - - crate::manifest::visit_file_hashes(manifest, |file_hash| { - if let FileHash::Missing(path) = file_hash { - match collected.get(path) { - Some(hash) => *file_hash = FileHash::Present(hash.clone()), - None => panic!("missing hash for file {}", path.display()), - } - } - }) - } - fn write_channel_files(&mut self, channel_name: &str, manifest: &Manifest) { self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml"); self.write(&manifest.date, channel_name, "-date.txt"); @@ -660,10 +626,3 @@ impl Builder { t!(std::fs::write(path, content.as_bytes())); } } - -fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> { - let mut file = BufReader::new(File::open(path)?); - let mut sha256 = sha2::Sha256::default(); - std::io::copy(&mut file, &mut sha256)?; - Ok(hex::encode(sha256.finalize())) -} |
