about summary refs log tree commit diff
diff options
context:
space:
mode:
authorJakub Beránek <berykubik@gmail.com>2025-04-10 12:11:24 +0200
committerJakub Beránek <berykubik@gmail.com>2025-04-20 09:35:42 +0200
commit40058519bae2c4921e5a1cf5db42c0e9861b156f (patch)
tree726e3e98997ddefec473924ccf46cd537e1f5597
parente9f3e3abda4ad922f09707702057bd13fc7bbd4a (diff)
downloadrust-40058519bae2c4921e5a1cf5db42c0e9861b156f.tar.gz
rust-40058519bae2c4921e5a1cf5db42c0e9861b156f.zip
Use `--author-date-order` when looking up upstream commits to support subtree synces
-rw-r--r--src/bootstrap/src/core/config/tests.rs71
-rw-r--r--src/bootstrap/src/utils/tests/git.rs10
-rw-r--r--src/build_helper/src/git.rs89
3 files changed, 143 insertions, 27 deletions
diff --git a/src/bootstrap/src/core/config/tests.rs b/src/bootstrap/src/core/config/tests.rs
index 04e328c8b1b..96ac8a6d52f 100644
--- a/src/bootstrap/src/core/config/tests.rs
+++ b/src/bootstrap/src/core/config/tests.rs
@@ -858,6 +858,7 @@ fn test_local_changes_in_previous_upstream() {
         ctx.create_branch("feature");
         ctx.modify("d");
         ctx.commit();
+
         assert_eq!(
             ctx.check_modifications(&["a"], CiEnv::None),
             PathFreshness::LastModifiedUpstream { upstream: sha }
@@ -914,3 +915,73 @@ fn test_local_changes_negative_path() {
         );
     });
 }
+
+#[test]
+fn test_local_changes_subtree_that_used_bors() {
+    // Here we simulate a very specific situation related to subtrees.
+    // When you have merge commits locally, we should ignore them w.r.t. the artifact download
+    // logic.
+    // The upstream search code currently uses a simple heuristic:
+    // - Find commits by bors (or in general an author with the merge commit e-mail)
+    // - Find the newest such commit
+    // This should make it work even for subtrees that:
+    // - Used bors in the past (so they have bors merge commits in their history).
+    // - Use Josh to merge rustc into the subtree, in a way that the rustc history is the second
+    // parent, not the first one.
+    //
+    // In addition, when searching for modified files, we cannot simply start from HEAD, because
+    // in this situation git wouldn't find the right commit.
+    //
+    // This test checks that this specific scenario will resolve to the right rustc commit, both
+    // when finding a modified file and when finding a non-existent file (which essentially means
+    // that we just lookup the most recent upstream commit).
+    //
+    // See https://github.com/rust-lang/rust/issues/101907#issuecomment-2697671282 for more details.
+    git_test(|ctx| {
+        ctx.create_upstream_merge(&["a"]);
+
+        // Start unrelated subtree history
+        ctx.run_git(&["switch", "--orphan", "subtree"]);
+        ctx.modify("bar");
+        ctx.commit();
+        // Now we need to emulate old bors commits in the subtree.
+        // Git only has a resolution of one second, which is a problem, since our git logic orders
+        // merge commits by their date.
+        // To avoid sleeping in the test, we modify the commit date to be forcefully in the past.
+        ctx.create_upstream_merge(&["subtree/a"]);
+        ctx.run_git(&["commit", "--amend", "--date", "Wed Feb 16 14:00 2011 +0100", "--no-edit"]);
+
+        // Merge the subtree history into rustc
+        ctx.switch_to_branch("main");
+        ctx.run_git(&["merge", "subtree", "--allow-unrelated"]);
+
+        // Create a rustc commit that modifies a path that we're interested in (`x`)
+        let upstream_1 = ctx.create_upstream_merge(&["x"]);
+        // Create another bors commit
+        let upstream_2 = ctx.create_upstream_merge(&["a"]);
+
+        ctx.switch_to_branch("subtree");
+
+        // Create a subtree branch
+        ctx.create_branch("subtree-pr");
+        ctx.modify("baz");
+        ctx.commit();
+        // We merge rustc into this branch (simulating a "subtree pull")
+        ctx.merge("main", "committer <committer@foo.bar>");
+
+        // And then merge that branch into the subtree (simulating a situation right before a
+        // "subtree push")
+        ctx.switch_to_branch("subtree");
+        ctx.merge("subtree-pr", "committer <committer@foo.bar>");
+
+        // And we want to check that we resolve to the right commits.
+        assert_eq!(
+            ctx.check_modifications(&["x"], CiEnv::None),
+            PathFreshness::LastModifiedUpstream { upstream: upstream_1 }
+        );
+        assert_eq!(
+            ctx.check_modifications(&["nonexistent"], CiEnv::None),
+            PathFreshness::LastModifiedUpstream { upstream: upstream_2 }
+        );
+    });
+}
diff --git a/src/bootstrap/src/utils/tests/git.rs b/src/bootstrap/src/utils/tests/git.rs
index 735dafb3f29..99e0793af46 100644
--- a/src/bootstrap/src/utils/tests/git.rs
+++ b/src/bootstrap/src/utils/tests/git.rs
@@ -53,12 +53,14 @@ impl GitCtx {
         modified_files: &[&str],
         author: &str,
     ) -> String {
+        let current_branch = self.get_current_branch();
+
         self.create_branch(branch);
         for file in modified_files {
             self.modify(file);
         }
         self.commit();
-        self.switch_to_branch("main");
+        self.switch_to_branch(&current_branch);
         self.merge(branch, author);
         self.run_git(&["branch", "-d", branch]);
         self.get_current_commit()
@@ -68,12 +70,16 @@ impl GitCtx {
         self.run_git(&["rev-parse", "HEAD"])
     }
 
+    pub fn get_current_branch(&self) -> String {
+        self.run_git(&["rev-parse", "--abbrev-ref", "HEAD"])
+    }
+
     pub fn merge(&self, branch: &str, author: &str) {
         self.run_git(&["merge", "--no-commit", "--no-ff", branch]);
         self.run_git(&[
             "commit".to_string(),
             "-m".to_string(),
-            "Merge of {branch}".to_string(),
+            format!("Merge of {branch} into {}", self.get_current_branch()),
             "--author".to_string(),
             author.to_string(),
         ]);
diff --git a/src/build_helper/src/git.rs b/src/build_helper/src/git.rs
index 8804f28987f..4b6cc08ef78 100644
--- a/src/build_helper/src/git.rs
+++ b/src/build_helper/src/git.rs
@@ -118,11 +118,8 @@ pub fn check_path_modifications(
         // modified the set of paths, to have an upstream reference that does not change
         // unnecessarily often.
         // However, if such commit is not found, we can fall back to the latest upstream commit
-        let upstream_with_modifications = get_latest_commit_that_modified_files(
-            git_dir,
-            target_paths,
-            config.git_merge_commit_email,
-        )?;
+        let upstream_with_modifications =
+            get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?;
         match upstream_with_modifications {
             Some(sha) => Some(sha),
             None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?,
@@ -157,17 +154,38 @@ pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool {
     !git.status().expect("cannot run git diff-index").success()
 }
 
-/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found.
-/// If `author` is `Some`, only considers commits made by that author.
-fn get_latest_commit_that_modified_files(
+/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
+/// was found.
+fn get_latest_upstream_commit_that_modified_files(
     git_dir: &Path,
+    git_config: &GitConfig<'_>,
     target_paths: &[&str],
-    author: &str,
 ) -> Result<Option<String>, String> {
     let mut git = Command::new("git");
     git.current_dir(git_dir);
 
-    git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]);
+    // In theory, we could just use
+    // `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
+    // to find the latest upstream commit that modified `<paths>`.
+    // However, this does not work if you are in a subtree sync branch that contains merge commits
+    // which have the subtree history as their first parent, and the rustc history as second parent:
+    // `--first-parent` will just walk up the subtree history and never see a single rustc commit.
+    // We thus have to take a two-pronged approach. First lookup the most recent upstream commit
+    // by *date* (this should work even in a subtree sync branch), and then start the lookup for
+    // modified paths starting from that commit.
+    //
+    // See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
+    let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)?
+        .unwrap_or_else(|| "HEAD".to_string());
+
+    git.args([
+        "rev-list",
+        "--first-parent",
+        "-n1",
+        &upstream,
+        "--author",
+        git_config.git_merge_commit_email,
+    ]);
 
     if !target_paths.is_empty() {
         git.arg("--").args(target_paths);
@@ -176,37 +194,45 @@ fn get_latest_commit_that_modified_files(
     if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
 }
 
-/// Returns the most recent commit found in the local history that should definitely
-/// exist upstream. We identify upstream commits by the e-mail of the commit author.
+/// Returns the most recent (ordered chronologically) commit found in the local history that
+/// should exist upstream. We identify upstream commits by the e-mail of the commit
+/// author.
 ///
-/// If `include_head` is false, the HEAD (current) commit will be ignored and only
-/// its parents will be searched. This is useful for try/auto CI, where HEAD is
-/// actually a commit made by bors, although it is not upstream yet.
+/// If we are in CI, we simply return our first parent.
 fn get_closest_upstream_commit(
     git_dir: Option<&Path>,
     config: &GitConfig<'_>,
     env: CiEnv,
 ) -> Result<Option<String>, String> {
+    let base = match env {
+        CiEnv::None => "HEAD",
+        CiEnv::GitHubActions => {
+            // On CI, we should always have a non-upstream merge commit at the tip,
+            // and our first parent should be the most recently merged upstream commit.
+            // We thus simply return our first parent.
+            return resolve_commit_sha(git_dir, "HEAD^1").map(Some);
+        }
+    };
+
     let mut git = Command::new("git");
 
     if let Some(git_dir) = git_dir {
         git.current_dir(git_dir);
     }
 
-    let base = match env {
-        CiEnv::None => "HEAD",
-        CiEnv::GitHubActions => {
-            // On CI, we always have a merge commit at the tip.
-            // We thus skip it, because although it can be created by
-            // `config.git_merge_commit_email`, it should not be upstream.
-            "HEAD^1"
-        }
-    };
+    // We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
+    // a subtree merge that actually has the main rustc history as its second parent.
+    // Using `--first-parent` would recurse into the history of the subtree, which could have some
+    // old bors commits that are not relevant to us.
+    // With `--author-date-order`, git recurses into all parent subtrees, and returns the most
+    // chronologically recent bors commit.
+    // Here we assume that none of our subtrees use bors anymore, and that all their old bors
+    // commits are way older than recent rustc bors commits!
     git.args([
         "rev-list",
+        "--author-date-order",
         &format!("--author={}", config.git_merge_commit_email),
         "-n1",
-        "--first-parent",
         &base,
     ]);
 
@@ -214,6 +240,19 @@ fn get_closest_upstream_commit(
     if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
 }
 
+/// Resolve the commit SHA of `commit_ref`.
+fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> {
+    let mut git = Command::new("git");
+
+    if let Some(git_dir) = git_dir {
+        git.current_dir(git_dir);
+    }
+
+    git.args(["rev-parse", commit_ref]);
+
+    Ok(output_result(&mut git)?.trim().to_owned())
+}
+
 /// Returns the files that have been modified in the current branch compared to the master branch.
 /// This includes committed changes, uncommitted changes, and changes that are not even staged.
 ///