about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJakub Beránek <berykubik@gmail.com>2025-03-16 13:34:02 +0100
committerJakub Beránek <berykubik@gmail.com>2025-04-20 09:11:21 +0200
commit082ab9c6d89071f2907cae69d45b06a4a6696004 (patch)
tree9160d8718d6fb4faffd355a66885d440707532e5
parent49e5e4e3a5610c240a717cb99003a5d5d3356679 (diff)
downloadrust-082ab9c6d89071f2907cae69d45b06a4a6696004.tar.gz
rust-082ab9c6d89071f2907cae69d45b06a4a6696004.zip
Implement a new unified function for figuring out how if a set of paths have been modified locally
Also adds several git tests to make sure that the behavior works in common cases (PR CI, auto CI, local usage).
-rw-r--r--src/bootstrap/Cargo.lock10
-rw-r--r--src/build_helper/Cargo.toml3
-rw-r--r--src/build_helper/src/git.rs181
-rw-r--r--src/build_helper/src/git/tests.rs278
4 files changed, 465 insertions, 7 deletions
diff --git a/src/bootstrap/Cargo.lock b/src/bootstrap/Cargo.lock
index 17ee4d610f9..beb9ea93989 100644
--- a/src/bootstrap/Cargo.lock
+++ b/src/bootstrap/Cargo.lock
@@ -225,12 +225,12 @@ dependencies = [
 
 [[package]]
 name = "errno"
-version = "0.3.9"
+version = "0.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
+checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
 dependencies = [
  "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -334,9 +334,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
 
 [[package]]
 name = "libc"
-version = "0.2.167"
+version = "0.2.171"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
+checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
 
 [[package]]
 name = "libredox"
diff --git a/src/build_helper/Cargo.toml b/src/build_helper/Cargo.toml
index 66894e1abc4..bdead0a36de 100644
--- a/src/build_helper/Cargo.toml
+++ b/src/build_helper/Cargo.toml
@@ -8,3 +8,6 @@ edition = "2021"
 [dependencies]
 serde = "1"
 serde_derive = "1"
+
+[dev-dependencies]
+tempfile = "3.19"
diff --git a/src/build_helper/src/git.rs b/src/build_helper/src/git.rs
index f5347c30824..e1e7319b9aa 100644
--- a/src/build_helper/src/git.rs
+++ b/src/build_helper/src/git.rs
@@ -1,3 +1,6 @@
+#[cfg(test)]
+mod tests;
+
 use std::path::Path;
 use std::process::{Command, Stdio};
 
@@ -167,7 +170,181 @@ pub fn get_closest_merge_commit(
     Ok(output_result(&mut git)?.trim().to_owned())
 }
 
-/// Returns the files that have been modified in the current branch compared to the last merge.
+/// Represents the result of checking whether a set of paths
+/// have been modified locally or not.
+#[derive(PartialEq, Debug)]
+pub enum PathFreshness {
+    /// Artifacts should be downloaded from this upstream commit,
+    /// there are no local modifications.
+    LastModifiedUpstream { upstream: String },
+    /// There are local modifications to a certain set of paths.
+    /// "Local" essentially means "not-upstream" here.
+    /// `upstream` is the latest upstream merge commit that made modifications to the
+    /// set of paths.
+    HasLocalModifications { upstream: String },
+}
+
+/// This function figures out if a set of paths was last modified upstream or
+/// if there are some local modifications made to them.
+///
+/// It can be used to figure out if we should download artifacts from CI or rather
+/// build them locally.
+///
+/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir`
+/// or the current working directory whose modifications would invalidate the artifact.
+/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside
+/// the `foo` directory.
+/// See https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec
+/// for how git `pathspec` works.
+///
+/// The function behaves differently in CI and outside CI.
+///
+/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
+/// top of the local master branch.
+/// If not, we try to find the most recent upstream commit (which we assume are commits
+/// made by bors) that modified `target_paths`.
+/// We don't want to simply take the latest master commit to avoid changing the output of
+/// this function frequently after rebasing on the latest master branch even if `target_paths`
+/// were not modified upstream in the meantime. In that case we would be redownloading CI
+/// artifacts unnecessarily.
+///
+/// - In CI, we always fetch only a single parent merge commit, so we do not have access
+/// to the full git history. Luckily, we only need to distinguish between two situations:
+/// 1) The current PR made modifications to `target_paths`.
+/// In that case, a build is typically necessary.
+/// 2) The current PR did not make modifications to `target_paths`.
+/// In that case we simply take the latest upstream commit, because on CI there is no need to avoid
+/// redownloading.
+pub fn check_path_modifications(
+    git_dir: Option<&Path>,
+    config: &GitConfig<'_>,
+    target_paths: &[&str],
+    ci_env: CiEnv,
+) -> Result<PathFreshness, String> {
+    assert!(!target_paths.is_empty());
+    for path in target_paths {
+        assert!(Path::new(path.trim_start_matches(":!")).is_relative());
+    }
+
+    let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
+        // Here the situation is different for PR CI and try/auto CI.
+        // For PR CI, we have the following history:
+        // <merge commit made by GitHub>
+        // 1-N PR commits
+        // upstream merge commit made by bors
+        //
+        // For try/auto CI, we have the following history:
+        // <**non-upstream** merge commit made by bors>
+        // 1-N PR commits
+        // upstream merge commit made by bors
+        //
+        // But on both cases, HEAD should be a merge commit.
+        // So if HEAD contains modifications of `target_paths`, our PR has modified
+        // them. If not, we can use the only available upstream commit for downloading
+        // artifacts.
+
+        // Do not include HEAD, as it is never an upstream commit
+        get_closest_upstream_commit(git_dir, config, ci_env)?
+    } else {
+        // Outside CI, we have to find the most recent upstream commit that
+        // modified the set of paths, to have an upstream reference.
+        let upstream_sha = get_latest_commit_that_modified_files(
+            git_dir,
+            target_paths,
+            config.git_merge_commit_email,
+        )?;
+        let Some(upstream_sha) = upstream_sha else {
+            eprintln!("No upstream commit that modified paths {target_paths:?} found.");
+            eprintln!("Try to fetch more upstream history.");
+            return Err("No upstream commit with modifications found".to_string());
+        };
+        upstream_sha
+    };
+
+    if has_changed_since(git_dir, &upstream_sha, target_paths) {
+        Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha })
+    } else {
+        Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha })
+    }
+}
+
+/// Returns true if any of the passed `paths` have changed since the `base` commit.
+pub fn has_changed_since(git_dir: Option<&Path>, base: &str, paths: &[&str]) -> bool {
+    let mut git = Command::new("git");
+
+    if let Some(git_dir) = git_dir {
+        git.current_dir(git_dir);
+    }
+
+    git.args(["diff-index", "--quiet", base, "--"]).args(paths);
+
+    // Exit code 0 => no changes
+    // Exit code 1 => some changes were detected
+    !git.status().expect("cannot run git diff-index").success()
+}
+
+/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
+/// If `author` is `Some`, only considers commits made by that author.
+fn get_latest_commit_that_modified_files(
+    git_dir: Option<&Path>,
+    target_paths: &[&str],
+    author: &str,
+) -> Result<Option<String>, String> {
+    let mut git = Command::new("git");
+
+    if let Some(git_dir) = git_dir {
+        git.current_dir(git_dir);
+    }
+
+    git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]);
+
+    if !target_paths.is_empty() {
+        git.arg("--").args(target_paths);
+    }
+    let output = output_result(&mut git)?.trim().to_owned();
+    if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
+}
+
+/// Returns the most recent commit found in the local history that should definitely
+/// exist upstream. We identify upstream commits by the e-mail of the commit author.
+///
+/// If `include_head` is false, the HEAD (current) commit will be ignored and only
+/// its parents will be searched. This is useful for try/auto CI, where HEAD is
+/// actually a commit made by bors, although it is not upstream yet.
+fn get_closest_upstream_commit(
+    git_dir: Option<&Path>,
+    config: &GitConfig<'_>,
+    env: CiEnv,
+) -> Result<String, String> {
+    let mut git = Command::new("git");
+
+    if let Some(git_dir) = git_dir {
+        git.current_dir(git_dir);
+    }
+
+    let base = match env {
+        CiEnv::None => "HEAD",
+        CiEnv::GitHubActions => {
+            // On CI, we always have a merge commit at the tip.
+            // We thus skip it, because although it can be creatd by
+            // `config.git_merge_commit_email`, it should not be upstream.
+            "HEAD^1"
+        }
+    };
+    git.args([
+        "rev-list",
+        &format!("--author={}", config.git_merge_commit_email),
+        "-n1",
+        "--first-parent",
+        &base,
+    ]);
+
+    Ok(output_result(&mut git)?.trim().to_owned())
+}
+
+/// Returns the files that have been modified in the current branch compared to the master branch.
+/// This includes committed changes, uncommitted changes, and changes that are not even staged.
+///
 /// The `extensions` parameter can be used to filter the files by their extension.
 /// Does not include removed files.
 /// If `extensions` is empty, all files will be returned.
@@ -176,7 +353,7 @@ pub fn get_git_modified_files(
     git_dir: Option<&Path>,
     extensions: &[&str],
 ) -> Result<Vec<String>, String> {
-    let merge_base = get_closest_merge_commit(git_dir, config, &[])?;
+    let merge_base = get_closest_upstream_commit(git_dir, config, CiEnv::None)?;
 
     let mut git = Command::new("git");
     if let Some(git_dir) = git_dir {
diff --git a/src/build_helper/src/git/tests.rs b/src/build_helper/src/git/tests.rs
new file mode 100644
index 00000000000..cdf50e14218
--- /dev/null
+++ b/src/build_helper/src/git/tests.rs
@@ -0,0 +1,278 @@
+use crate::ci::CiEnv;
+use crate::git::{GitConfig, PathFreshness, check_path_modifications};
+use std::ffi::OsStr;
+use std::fs::OpenOptions;
+use std::process::Command;
+
+#[test]
+fn test_pr_ci_unchanged_anywhere() {
+    git_test(|ctx| {
+        let sha = ctx.create_upstream_merge(&["a"]);
+        ctx.create_nonupstream_merge(&["b"]);
+        let src = ctx.get_source(&["c"], CiEnv::GitHubActions);
+        assert_eq!(src, PathFreshness::LastModifiedUpstream { upstream: sha });
+    });
+}
+
+#[test]
+fn test_pr_ci_changed_in_pr() {
+    git_test(|ctx| {
+        let sha = ctx.create_upstream_merge(&["a"]);
+        ctx.create_nonupstream_merge(&["b"]);
+        let src = ctx.get_source(&["b"], CiEnv::GitHubActions);
+        assert_eq!(src, PathFreshness::HasLocalModifications { upstream: sha });
+    });
+}
+
+#[test]
+fn test_auto_ci_unchanged_anywhere_select_parent() {
+    git_test(|ctx| {
+        let sha = ctx.create_upstream_merge(&["a"]);
+        ctx.create_upstream_merge(&["b"]);
+        let src = ctx.get_source(&["c"], CiEnv::GitHubActions);
+        assert_eq!(src, PathFreshness::LastModifiedUpstream { upstream: sha });
+    });
+}
+
+#[test]
+fn test_auto_ci_changed_in_pr() {
+    git_test(|ctx| {
+        let sha = ctx.create_upstream_merge(&["a"]);
+        ctx.create_upstream_merge(&["b", "c"]);
+        let src = ctx.get_source(&["c", "d"], CiEnv::GitHubActions);
+        assert_eq!(src, PathFreshness::HasLocalModifications { upstream: sha });
+    });
+}
+
+#[test]
+fn test_local_uncommitted_modifications() {
+    git_test(|ctx| {
+        let sha = ctx.create_upstream_merge(&["a"]);
+        ctx.create_branch("feature");
+        ctx.modify("a");
+
+        assert_eq!(
+            ctx.get_source(&["a", "d"], CiEnv::None),
+            PathFreshness::HasLocalModifications { upstream: sha }
+        );
+    });
+}
+
+#[test]
+fn test_local_committed_modifications() {
+    git_test(|ctx| {
+        let sha = ctx.create_upstream_merge(&["a"]);
+        ctx.create_upstream_merge(&["b", "c"]);
+        ctx.create_branch("feature");
+        ctx.modify("x");
+        ctx.commit();
+        ctx.modify("a");
+        ctx.commit();
+
+        assert_eq!(
+            ctx.get_source(&["a", "d"], CiEnv::None),
+            PathFreshness::HasLocalModifications { upstream: sha }
+        );
+    });
+}
+
+#[test]
+fn test_local_committed_modifications_subdirectory() {
+    git_test(|ctx| {
+        let sha = ctx.create_upstream_merge(&["a/b/c"]);
+        ctx.create_upstream_merge(&["b", "c"]);
+        ctx.create_branch("feature");
+        ctx.modify("a/b/d");
+        ctx.commit();
+
+        assert_eq!(
+            ctx.get_source(&["a/b"], CiEnv::None),
+            PathFreshness::HasLocalModifications { upstream: sha }
+        );
+    });
+}
+
+#[test]
+fn test_changes_in_head_upstream() {
+    git_test(|ctx| {
+        // We want to resolve to the upstream commit that made modifications to a,
+        // even if it is currently HEAD
+        let sha = ctx.create_upstream_merge(&["a"]);
+        assert_eq!(
+            ctx.get_source(&["a", "d"], CiEnv::None),
+            PathFreshness::LastModifiedUpstream { upstream: sha }
+        );
+    });
+}
+
+#[test]
+fn test_changes_in_previous_upstream() {
+    git_test(|ctx| {
+        // We want to resolve to this commit, which modified a
+        let sha = ctx.create_upstream_merge(&["a", "e"]);
+        // Not to this commit, which is the latest upstream commit
+        ctx.create_upstream_merge(&["b", "c"]);
+        ctx.create_branch("feature");
+        ctx.modify("d");
+        ctx.commit();
+        assert_eq!(
+            ctx.get_source(&["a"], CiEnv::None),
+            PathFreshness::LastModifiedUpstream { upstream: sha }
+        );
+    });
+}
+
+#[test]
+fn test_changes_negative_path() {
+    git_test(|ctx| {
+        let upstream = ctx.create_upstream_merge(&["a"]);
+        ctx.create_branch("feature");
+        ctx.modify("b");
+        ctx.modify("d");
+        ctx.commit();
+
+        assert_eq!(
+            ctx.get_source(&[":!b", ":!d"], CiEnv::None),
+            PathFreshness::LastModifiedUpstream { upstream: upstream.clone() }
+        );
+        assert_eq!(
+            ctx.get_source(&[":!c"], CiEnv::None),
+            PathFreshness::HasLocalModifications { upstream: upstream.clone() }
+        );
+        assert_eq!(
+            ctx.get_source(&[":!d", ":!x"], CiEnv::None),
+            PathFreshness::HasLocalModifications { upstream }
+        );
+    });
+}
+
+struct GitCtx {
+    dir: tempfile::TempDir,
+    git_repo: String,
+    nightly_branch: String,
+    merge_bot_email: String,
+}
+
+impl GitCtx {
+    fn new() -> Self {
+        let dir = tempfile::TempDir::new().unwrap();
+        let ctx = Self {
+            dir,
+            git_repo: "rust-lang/rust".to_string(),
+            nightly_branch: "nightly".to_string(),
+            merge_bot_email: "Merge bot <merge-bot@rust-lang.org>".to_string(),
+        };
+        ctx.run_git(&["init"]);
+        ctx.run_git(&["config", "user.name", "Tester"]);
+        ctx.run_git(&["config", "user.email", "tester@rust-lang.org"]);
+        ctx.modify("README.md");
+        ctx.commit();
+        ctx.run_git(&["branch", "-m", "main"]);
+        ctx
+    }
+
+    fn get_source(&self, target_paths: &[&str], ci_env: CiEnv) -> PathFreshness {
+        check_path_modifications(Some(self.dir.path()), &self.git_config(), target_paths, ci_env)
+            .unwrap()
+    }
+
+    fn create_upstream_merge(&self, modified_files: &[&str]) -> String {
+        self.create_branch_and_merge("previous-pr", modified_files, &self.merge_bot_email)
+    }
+
+    fn create_nonupstream_merge(&self, modified_files: &[&str]) -> String {
+        self.create_branch_and_merge("pr", modified_files, "Tester <tester@rust-lang.org>")
+    }
+
+    fn create_branch_and_merge(
+        &self,
+        branch: &str,
+        modified_files: &[&str],
+        author: &str,
+    ) -> String {
+        self.create_branch(branch);
+        for file in modified_files {
+            self.modify(file);
+        }
+        self.commit();
+        self.switch_to_branch("main");
+        self.merge(branch, author);
+        self.run_git(&["branch", "-d", branch]);
+        self.get_current_commit()
+    }
+
+    fn get_current_commit(&self) -> String {
+        self.run_git(&["rev-parse", "HEAD"])
+    }
+
+    fn merge(&self, branch: &str, author: &str) {
+        self.run_git(&["merge", "--no-commit", "--no-ff", branch]);
+        self.run_git(&[
+            "commit".to_string(),
+            "-m".to_string(),
+            "Merge of {branch}".to_string(),
+            "--author".to_string(),
+            author.to_string(),
+        ]);
+    }
+
+    fn modify(&self, path: &str) {
+        use std::io::Write;
+
+        let path = self.dir.path().join(path);
+        std::fs::create_dir_all(&path.parent().unwrap()).unwrap();
+
+        let mut file = OpenOptions::new().create(true).append(true).open(path).unwrap();
+        writeln!(file, "line").unwrap();
+    }
+
+    fn commit(&self) -> String {
+        self.run_git(&["add", "."]);
+        self.run_git(&["commit", "-m", "commit message"]);
+        self.get_current_commit()
+    }
+
+    fn switch_to_branch(&self, name: &str) {
+        self.run_git(&["switch", name]);
+    }
+
+    /// Creates a branch and switches to it.
+    fn create_branch(&self, name: &str) {
+        self.run_git(&["checkout", "-b", name]);
+    }
+
+    fn run_git<S: AsRef<OsStr>>(&self, args: &[S]) -> String {
+        let mut cmd = self.git_cmd();
+        cmd.args(args);
+        eprintln!("Running {cmd:?}");
+        let output = cmd.output().unwrap();
+        let stdout = String::from_utf8(output.stdout).unwrap().trim().to_string();
+        let stderr = String::from_utf8(output.stderr).unwrap().trim().to_string();
+        if !output.status.success() {
+            panic!("Git command `{cmd:?}` failed\nStdout\n{stdout}\nStderr\n{stderr}");
+        }
+        stdout
+    }
+
+    fn git_cmd(&self) -> Command {
+        let mut cmd = Command::new("git");
+        cmd.current_dir(&self.dir);
+        cmd
+    }
+
+    fn git_config(&self) -> GitConfig<'_> {
+        GitConfig {
+            git_repository: &self.git_repo,
+            nightly_branch: &self.nightly_branch,
+            git_merge_commit_email: &self.merge_bot_email,
+        }
+    }
+}
+
+fn git_test<F>(test_fn: F)
+where
+    F: FnOnce(&GitCtx),
+{
+    let ctx = GitCtx::new();
+    test_fn(&ctx);
+}