about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPietro Albini <pietro@pietroalbini.org>2020-10-01 17:15:12 +0200
committerPietro Albini <pietro@pietroalbini.org>2020-10-01 17:30:24 +0200
commitacd8e59b6650deab3979e82b49c8c26e0f4ceefa (patch)
tree3e04475b5e1a4603b588aadb97903998426e0c45
parent0375ee8b55becd14ca73ad420b7311fb6e9dfaeb (diff)
downloadrust-acd8e59b6650deab3979e82b49c8c26e0f4ceefa.tar.gz
rust-acd8e59b6650deab3979e82b49c8c26e0f4ceefa.zip
build-manifest: calculate checksums lazily and in parallel
This commit improves the way build-manifest calculates the checksums
included in the manifest, speeding it up:

* Instead of calculating all the hashes beforehand and then using the
  ones we need, the manifest is first generated with placeholder hashes,
  and then a function walks through the manifest and calculates only the
  needed checksums.

* Calculating the checksums is now done in parallel with rayon, to
  better utilize all the available disk bandwidth.

* Calculating the checksums now uses the sha2 crate instead of the
  sha256sum CLI tool: this avoids the overhead of calling another
  process, but more importantly uses hardware acceleration whenever
  available (the CLI tool doesn't support it at all).
-rw-r--r--Cargo.lock72
-rw-r--r--src/tools/build-manifest/Cargo.toml3
-rw-r--r--src/tools/build-manifest/src/main.rs71
-rw-r--r--src/tools/build-manifest/src/manifest.rs47
4 files changed, 164 insertions, 29 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 28bd57ef673..649972e61d7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -183,7 +183,16 @@ dependencies = [
  "block-padding",
  "byte-tools",
  "byteorder",
- "generic-array",
+ "generic-array 0.12.3",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
+dependencies = [
+ "generic-array 0.14.4",
 ]
 
 [[package]]
@@ -233,8 +242,11 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "flate2",
+ "hex 0.4.2",
+ "rayon",
  "serde",
  "serde_json",
+ "sha2",
  "tar",
  "toml",
 ]
@@ -688,6 +700,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9a21fa21941700a3cd8fcb4091f361a6a712fac632f85d9f487cc892045d55c6"
 
 [[package]]
+name = "cpuid-bool"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634"
+
+[[package]]
 name = "crates-io"
 version = "0.31.1"
 dependencies = [
@@ -884,7 +902,16 @@ version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
 dependencies = [
- "generic-array",
+ "generic-array 0.12.3",
+]
+
+[[package]]
+name = "digest"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
+dependencies = [
+ "generic-array 0.14.4",
 ]
 
 [[package]]
@@ -1167,6 +1194,16 @@ dependencies = [
 ]
 
 [[package]]
+name = "generic-array"
+version = "0.14.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
 name = "getopts"
 version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1844,9 +1881,9 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a18af3dcaf2b0219366cdb4e2af65a6101457b415c3d1a5c71dd9c2b7c77b9c8"
 dependencies = [
- "block-buffer",
- "digest",
- "opaque-debug",
+ "block-buffer 0.7.3",
+ "digest 0.8.1",
+ "opaque-debug 0.2.3",
 ]
 
 [[package]]
@@ -2107,6 +2144,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
 
 [[package]]
+name = "opaque-debug"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
+
+[[package]]
 name = "open"
 version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -4371,10 +4414,23 @@ version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df"
 dependencies = [
- "block-buffer",
- "digest",
+ "block-buffer 0.7.3",
+ "digest 0.8.1",
  "fake-simd",
- "opaque-debug",
+ "opaque-debug 0.2.3",
+]
+
+[[package]]
+name = "sha2"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1"
+dependencies = [
+ "block-buffer 0.9.0",
+ "cfg-if",
+ "cpuid-bool",
+ "digest 0.9.0",
+ "opaque-debug 0.3.0",
 ]
 
 [[package]]
diff --git a/src/tools/build-manifest/Cargo.toml b/src/tools/build-manifest/Cargo.toml
index 4f89c31936d..4ae4dbfc06e 100644
--- a/src/tools/build-manifest/Cargo.toml
+++ b/src/tools/build-manifest/Cargo.toml
@@ -11,3 +11,6 @@ serde_json = "1.0"
 anyhow = "1.0.32"
 flate2 = "1.0.16"
 tar = "0.4.29"
+sha2 = "0.9.1"
+rayon = "1.3.1"
+hex = "0.4.2"
diff --git a/src/tools/build-manifest/src/main.rs b/src/tools/build-manifest/src/main.rs
index 5f01b46d70d..91f042a07d4 100644
--- a/src/tools/build-manifest/src/main.rs
+++ b/src/tools/build-manifest/src/main.rs
@@ -7,14 +7,19 @@
 mod manifest;
 mod versions;
 
-use crate::manifest::{Component, Manifest, Package, Rename, Target};
+use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
 use crate::versions::{PkgType, Versions};
-use std::collections::{BTreeMap, HashMap};
+use rayon::prelude::*;
+use sha2::Digest;
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::env;
+use std::error::Error;
 use std::fs::{self, File};
 use std::io::{self, Read, Write};
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
+use std::sync::Mutex;
+use std::time::Instant;
 
 static HOSTS: &[&str] = &[
     "aarch64-unknown-linux-gnu",
@@ -181,7 +186,6 @@ struct Builder {
 
     input: PathBuf,
     output: PathBuf,
-    digests: BTreeMap<String, String>,
     s3_address: String,
     date: String,
 
@@ -223,7 +227,6 @@ fn main() {
 
         input,
         output,
-        digests: BTreeMap::new(),
         s3_address,
         date,
 
@@ -236,7 +239,9 @@ fn main() {
 impl Builder {
     fn build(&mut self) {
         self.check_toolstate();
-        self.digest_and_sign();
+        if self.legacy {
+            self.digest_and_sign();
+        }
         let manifest = self.build_manifest();
 
         let rust_version = self.versions.package_version(&PkgType::Rust).unwrap();
@@ -270,10 +275,9 @@ impl Builder {
     /// Hash all files, compute their signatures, and collect the hashes in `self.digests`.
     fn digest_and_sign(&mut self) {
         for file in t!(self.input.read_dir()).map(|e| t!(e).path()) {
-            let filename = file.file_name().unwrap().to_str().unwrap();
-            let digest = self.hash(&file);
+            file.file_name().unwrap().to_str().unwrap();
+            self.hash(&file);
             self.sign(&file);
-            assert!(self.digests.insert(filename.to_string(), digest).is_none());
         }
     }
 
@@ -289,6 +293,9 @@ impl Builder {
         self.add_profiles_to(&mut manifest);
         self.add_renames_to(&mut manifest);
         manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));
+
+        self.fill_missing_hashes(&mut manifest);
+
         manifest
     }
 
@@ -561,6 +568,41 @@ impl Builder {
         assert!(t!(child.wait()).success());
     }
 
+    fn fill_missing_hashes(&self, manifest: &mut Manifest) {
+        // First collect all files that need hashes
+        let mut need_hashes = HashSet::new();
+        crate::manifest::visit_file_hashes(manifest, |file_hash| {
+            if let FileHash::Missing(path) = file_hash {
+                need_hashes.insert(path.clone());
+            }
+        });
+
+        let collected = Mutex::new(HashMap::new());
+        let collection_start = Instant::now();
+        println!(
+            "collecting hashes for {} tarballs across {} threads",
+            need_hashes.len(),
+            rayon::current_num_threads().min(need_hashes.len()),
+        );
+        need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
+            Ok(hash) => {
+                collected.lock().unwrap().insert(path, hash);
+            }
+            Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
+        });
+        let collected = collected.into_inner().unwrap();
+        println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());
+
+        crate::manifest::visit_file_hashes(manifest, |file_hash| {
+            if let FileHash::Missing(path) = file_hash {
+                match collected.get(path) {
+                    Some(hash) => *file_hash = FileHash::Present(hash.clone()),
+                    None => panic!("missing hash for file {}", path.display()),
+                }
+            }
+        })
+    }
+
     fn write_channel_files(&self, channel_name: &str, manifest: &Manifest) {
         self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
         self.write(&manifest.date, channel_name, "-date.txt");
@@ -574,7 +616,16 @@ impl Builder {
     fn write(&self, contents: &str, channel_name: &str, suffix: &str) {
         let dst = self.output.join(format!("channel-rust-{}{}", channel_name, suffix));
         t!(fs::write(&dst, contents));
-        self.hash(&dst);
-        self.sign(&dst);
+        if self.legacy {
+            self.hash(&dst);
+            self.sign(&dst);
+        }
     }
 }
+
+fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
+    let mut file = File::open(path)?;
+    let mut sha256 = sha2::Sha256::default();
+    std::io::copy(&mut file, &mut sha256)?;
+    Ok(hex::encode(sha256.finalize()))
+}
diff --git a/src/tools/build-manifest/src/manifest.rs b/src/tools/build-manifest/src/manifest.rs
index 2a5755c1bf1..20e62abb54c 100644
--- a/src/tools/build-manifest/src/manifest.rs
+++ b/src/tools/build-manifest/src/manifest.rs
@@ -1,5 +1,5 @@
 use crate::Builder;
-use serde::Serialize;
+use serde::{Serialize, Serializer};
 use std::collections::BTreeMap;
 use std::path::{Path, PathBuf};
 
@@ -29,9 +29,9 @@ pub(crate) struct Rename {
 pub(crate) struct Target {
     pub(crate) available: bool,
     pub(crate) url: Option<String>,
-    pub(crate) hash: Option<String>,
+    pub(crate) hash: Option<FileHash>,
     pub(crate) xz_url: Option<String>,
-    pub(crate) xz_hash: Option<String>,
+    pub(crate) xz_hash: Option<FileHash>,
     pub(crate) components: Option<Vec<Component>>,
     pub(crate) extensions: Option<Vec<Component>>,
 }
@@ -52,10 +52,10 @@ impl Target {
             extensions: None,
             // .gz
             url: gz.as_ref().map(|path| builder.url(path)),
-            hash: gz.map(|path| Self::digest_of(builder, &path)),
+            hash: gz.map(FileHash::Missing),
             // .xz
             xz_url: xz.as_ref().map(|path| builder.url(path)),
-            xz_hash: xz.map(|path| Self::digest_of(builder, &path)),
+            xz_hash: xz.map(FileHash::Missing),
         }
     }
 
@@ -65,12 +65,6 @@ impl Target {
         if path.is_file() { Some(path) } else { None }
     }
 
-    fn digest_of(builder: &Builder, path: &Path) -> String {
-        // TEMPORARY CODE -- DON'T REVIEW :)
-        let file_name = path.file_name().unwrap().to_str().unwrap();
-        builder.digests.get(file_name).unwrap().clone()
-    }
-
     pub(crate) fn unavailable() -> Self {
         Self::default()
     }
@@ -87,3 +81,34 @@ impl Component {
         Self { pkg: pkg.to_string(), target: target.to_string() }
     }
 }
+
+#[allow(unused)]
+pub(crate) enum FileHash {
+    Missing(PathBuf),
+    Present(String),
+}
+
+impl Serialize for FileHash {
+    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+        match self {
+            FileHash::Missing(path) => Err(serde::ser::Error::custom(format!(
+                "can't serialize a missing hash for file {}",
+                path.display()
+            ))),
+            FileHash::Present(inner) => inner.serialize(serializer),
+        }
+    }
+}
+
+pub(crate) fn visit_file_hashes(manifest: &mut Manifest, mut f: impl FnMut(&mut FileHash)) {
+    for pkg in manifest.pkg.values_mut() {
+        for target in pkg.target.values_mut() {
+            if let Some(hash) = &mut target.hash {
+                f(hash);
+            }
+            if let Some(hash) = &mut target.xz_hash {
+                f(hash);
+            }
+        }
+    }
+}