about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2024-03-31 12:36:23 +0000
committerbors <bors@rust-lang.org>2024-03-31 12:36:23 +0000
commita8cfc83801301c2b4f0fd030192e268eeb15d473 (patch)
tree0b4194fc5c0470b9f18d3720fbd8a209695afdae /src
parent395f780cd8da001b35d7e588177bbad82d5cf8fa (diff)
parent877e8d456db497c2b7b895cb88243fb894584701 (diff)
downloadrust-a8cfc83801301c2b4f0fd030192e268eeb15d473.tar.gz
rust-a8cfc83801301c2b4f0fd030192e268eeb15d473.zip
Auto merge of #123246 - Kobzol:tarball-reproducible, r=Mark-Simulacrum
Make source tarball generation more reproducible

This PR performs several changes to source tarball generation (`x dist rustc-src`) in order to make it more reproducible (in light of the recent "xz backdoor"...). I want to follow up on it with making a separate CI workflow for generating the tarball.

After this PR, running this locally produces identical checksums:
```bash
$ ./x dist rustc-src
$ sha256sum build/dist/rustc-1.79.0-src.tar.gz

$ ./x dist rustc-src
$ sha256sum build/dist/rustc-1.79.0-src.tar.gz
```

r? `@Mark-Simulacrum`
Diffstat (limited to 'src')
-rw-r--r--src/bootstrap/src/core/build_steps/dist.rs20
-rw-r--r--src/bootstrap/src/core/builder.rs55
-rw-r--r--src/tools/rust-installer/src/tarballer.rs12
3 files changed, 58 insertions, 29 deletions
diff --git a/src/bootstrap/src/core/build_steps/dist.rs b/src/bootstrap/src/core/build_steps/dist.rs
index d9c7032d0db..ecea62140a2 100644
--- a/src/bootstrap/src/core/build_steps/dist.rs
+++ b/src/bootstrap/src/core/build_steps/dist.rs
@@ -995,9 +995,9 @@ impl Step for PlainSourceTarball {
         if builder.rust_info().is_managed_git_subrepository()
             || builder.rust_info().is_from_tarball()
         {
-            if builder.rust_info().is_managed_git_subrepository() {
-                // Ensure we have the submodules checked out.
-                builder.update_submodule(Path::new("src/tools/cargo"));
+            // Ensure we have all submodules from src and other directories checked out.
+            for submodule in builder.get_all_submodules() {
+                builder.update_submodule(Path::new(submodule));
             }
 
             // Vendor all Cargo dependencies
@@ -1028,6 +1028,20 @@ impl Step for PlainSourceTarball {
             builder.create(&cargo_config_dir.join("config.toml"), &config);
         }
 
+        // Delete extraneous directories
+        // FIXME: if we're managed by git, we should probably instead ask git if the given path
+        // is managed by it?
+        for entry in walkdir::WalkDir::new(tarball.image_dir())
+            .follow_links(true)
+            .into_iter()
+            .filter_map(|e| e.ok())
+        {
+            if entry.path().is_dir() && entry.path().file_name() == Some(OsStr::new("__pycache__"))
+            {
+                t!(fs::remove_dir_all(entry.path()));
+            }
+        }
+
         tarball.bare()
     }
 }
diff --git a/src/bootstrap/src/core/builder.rs b/src/bootstrap/src/core/builder.rs
index 7f93fdc72ef..e9906ebd836 100644
--- a/src/bootstrap/src/core/builder.rs
+++ b/src/bootstrap/src/core/builder.rs
@@ -554,29 +554,7 @@ impl<'a> ShouldRun<'a> {
     ///
     /// [`path`]: ShouldRun::path
     pub fn paths(mut self, paths: &[&str]) -> Self {
-        static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
-
-        let init_submodules_paths = |src: &PathBuf| {
-            let file = File::open(src.join(".gitmodules")).unwrap();
-
-            let mut submodules_paths = vec![];
-            for line in BufReader::new(file).lines() {
-                if let Ok(line) = line {
-                    let line = line.trim();
-
-                    if line.starts_with("path") {
-                        let actual_path =
-                            line.split(' ').last().expect("Couldn't get value of path");
-                        submodules_paths.push(actual_path.to_owned());
-                    }
-                }
-            }
-
-            submodules_paths
-        };
-
-        let submodules_paths =
-            SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.builder.src));
+        let submodules_paths = self.builder.get_all_submodules();
 
         self.paths.insert(PathSet::Set(
             paths
@@ -2151,6 +2129,37 @@ impl<'a> Builder<'a> {
         out
     }
 
+    /// Return paths of all submodules managed by git.
+    /// If the current checkout is not managed by git, returns an empty slice.
+    pub fn get_all_submodules(&self) -> &[String] {
+        if !self.rust_info().is_managed_git_subrepository() {
+            return &[];
+        }
+
+        static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
+
+        let init_submodules_paths = |src: &PathBuf| {
+            let file = File::open(src.join(".gitmodules")).unwrap();
+
+            let mut submodules_paths = vec![];
+            for line in BufReader::new(file).lines() {
+                if let Ok(line) = line {
+                    let line = line.trim();
+
+                    if line.starts_with("path") {
+                        let actual_path =
+                            line.split(' ').last().expect("Couldn't get value of path");
+                        submodules_paths.push(actual_path.to_owned());
+                    }
+                }
+            }
+
+            submodules_paths
+        };
+
+        &SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.src))
+    }
+
     /// Ensure that a given step is built *only if it's supposed to be built by default*, returning
     /// its output. This will cache the step, so it's safe (and good!) to call this as often as
     /// needed to ensure that all dependencies are build.
diff --git a/src/tools/rust-installer/src/tarballer.rs b/src/tools/rust-installer/src/tarballer.rs
index e5a925b2cbf..2f093e7ad17 100644
--- a/src/tools/rust-installer/src/tarballer.rs
+++ b/src/tools/rust-installer/src/tarballer.rs
@@ -2,7 +2,7 @@ use anyhow::{bail, Context, Result};
 use std::fs::{read_link, symlink_metadata};
 use std::io::{BufWriter, Write};
 use std::path::Path;
-use tar::{Builder, Header};
+use tar::{Builder, Header, HeaderMode};
 use walkdir::WalkDir;
 
 use crate::{
@@ -53,14 +53,19 @@ impl Tarballer {
         // Sort files by their suffix, to group files with the same name from
         // different locations (likely identical) and files with the same
         // extension (likely containing similar data).
-        let (dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
+        // Sorting of file and directory paths also helps with the reproducibility
+        // of the resulting archive.
+        let (mut dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
             .context("failed to collect file paths")?;
+        dirs.sort();
         files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));
 
         // Write the tar into both encoded files. We write all directories
         // first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
         let buf = BufWriter::with_capacity(1024 * 1024, encoder);
         let mut builder = Builder::new(buf);
+        // Make uid, gid and mtime deterministic to improve reproducibility
+        builder.mode(HeaderMode::Deterministic);
 
         let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
         pool.install(move || {
@@ -91,7 +96,8 @@ impl Tarballer {
 fn append_path<W: Write>(builder: &mut Builder<W>, src: &Path, path: &String) -> Result<()> {
     let stat = symlink_metadata(src)?;
     let mut header = Header::new_gnu();
-    header.set_metadata(&stat);
+    header.set_metadata_in_mode(&stat, HeaderMode::Deterministic);
+
     if stat.file_type().is_symlink() {
         let link = read_link(src)?;
         builder.append_link(&mut header, path, &link)?;