diff options
| author | Jakub Beránek <berykubik@gmail.com> | 2025-04-10 12:11:24 +0200 |
|---|---|---|
| committer | Jakub Beránek <berykubik@gmail.com> | 2025-04-20 09:35:42 +0200 |
| commit | 40058519bae2c4921e5a1cf5db42c0e9861b156f (patch) | |
| tree | 726e3e98997ddefec473924ccf46cd537e1f5597 | |
| parent | e9f3e3abda4ad922f09707702057bd13fc7bbd4a (diff) | |
| download | rust-40058519bae2c4921e5a1cf5db42c0e9861b156f.tar.gz rust-40058519bae2c4921e5a1cf5db42c0e9861b156f.zip | |
Use `--author-date-order` when looking up upstream commits to support subtree synces
| -rw-r--r-- | src/bootstrap/src/core/config/tests.rs | 71 | ||||
| -rw-r--r-- | src/bootstrap/src/utils/tests/git.rs | 10 | ||||
| -rw-r--r-- | src/build_helper/src/git.rs | 89 |
3 files changed, 143 insertions, 27 deletions
diff --git a/src/bootstrap/src/core/config/tests.rs b/src/bootstrap/src/core/config/tests.rs index 04e328c8b1b..96ac8a6d52f 100644 --- a/src/bootstrap/src/core/config/tests.rs +++ b/src/bootstrap/src/core/config/tests.rs @@ -858,6 +858,7 @@ fn test_local_changes_in_previous_upstream() { ctx.create_branch("feature"); ctx.modify("d"); ctx.commit(); + assert_eq!( ctx.check_modifications(&["a"], CiEnv::None), PathFreshness::LastModifiedUpstream { upstream: sha } @@ -914,3 +915,73 @@ fn test_local_changes_negative_path() { ); }); } + +#[test] +fn test_local_changes_subtree_that_used_bors() { + // Here we simulate a very specific situation related to subtrees. + // When you have merge commits locally, we should ignore them w.r.t. the artifact download + // logic. + // The upstream search code currently uses a simple heuristic: + // - Find commits by bors (or in general an author with the merge commit e-mail) + // - Find the newest such commit + // This should make it work even for subtrees that: + // - Used bors in the past (so they have bors merge commits in their history). + // - Use Josh to merge rustc into the subtree, in a way that the rustc history is the second + // parent, not the first one. + // + // In addition, when searching for modified files, we cannot simply start from HEAD, because + // in this situation git wouldn't find the right commit. + // + // This test checks that this specific scenario will resolve to the right rustc commit, both + // when finding a modified file and when finding a non-existent file (which essentially means + // that we just lookup the most recent upstream commit). + // + // See https://github.com/rust-lang/rust/issues/101907#issuecomment-2697671282 for more details. + git_test(|ctx| { + ctx.create_upstream_merge(&["a"]); + + // Start unrelated subtree history + ctx.run_git(&["switch", "--orphan", "subtree"]); + ctx.modify("bar"); + ctx.commit(); + // Now we need to emulate old bors commits in the subtree. + // Git only has a resolution of one second, which is a problem, since our git logic orders + // merge commits by their date. + // To avoid sleeping in the test, we modify the commit date to be forcefully in the past. + ctx.create_upstream_merge(&["subtree/a"]); + ctx.run_git(&["commit", "--amend", "--date", "Wed Feb 16 14:00 2011 +0100", "--no-edit"]); + + // Merge the subtree history into rustc + ctx.switch_to_branch("main"); + ctx.run_git(&["merge", "subtree", "--allow-unrelated"]); + + // Create a rustc commit that modifies a path that we're interested in (`x`) + let upstream_1 = ctx.create_upstream_merge(&["x"]); + // Create another bors commit + let upstream_2 = ctx.create_upstream_merge(&["a"]); + + ctx.switch_to_branch("subtree"); + + // Create a subtree branch + ctx.create_branch("subtree-pr"); + ctx.modify("baz"); + ctx.commit(); + // We merge rustc into this branch (simulating a "subtree pull") + ctx.merge("main", "committer <committer@foo.bar>"); + + // And then merge that branch into the subtree (simulating a situation right before a + // "subtree push") + ctx.switch_to_branch("subtree"); + ctx.merge("subtree-pr", "committer <committer@foo.bar>"); + + // And we want to check that we resolve to the right commits. + assert_eq!( + ctx.check_modifications(&["x"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: upstream_1 } + ); + assert_eq!( + ctx.check_modifications(&["nonexistent"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: upstream_2 } + ); + }); +} diff --git a/src/bootstrap/src/utils/tests/git.rs b/src/bootstrap/src/utils/tests/git.rs index 735dafb3f29..99e0793af46 100644 --- a/src/bootstrap/src/utils/tests/git.rs +++ b/src/bootstrap/src/utils/tests/git.rs @@ -53,12 +53,14 @@ impl GitCtx { modified_files: &[&str], author: &str, ) -> String { + let current_branch = self.get_current_branch(); + self.create_branch(branch); for file in modified_files { self.modify(file); } self.commit(); - self.switch_to_branch("main"); + self.switch_to_branch(¤t_branch); self.merge(branch, author); self.run_git(&["branch", "-d", branch]); self.get_current_commit() @@ -68,12 +70,16 @@ impl GitCtx { self.run_git(&["rev-parse", "HEAD"]) } + pub fn get_current_branch(&self) -> String { + self.run_git(&["rev-parse", "--abbrev-ref", "HEAD"]) + } + pub fn merge(&self, branch: &str, author: &str) { self.run_git(&["merge", "--no-commit", "--no-ff", branch]); self.run_git(&[ "commit".to_string(), "-m".to_string(), - "Merge of {branch}".to_string(), + format!("Merge of {branch} into {}", self.get_current_branch()), "--author".to_string(), author.to_string(), ]); diff --git a/src/build_helper/src/git.rs b/src/build_helper/src/git.rs index 8804f28987f..4b6cc08ef78 100644 --- a/src/build_helper/src/git.rs +++ b/src/build_helper/src/git.rs @@ -118,11 +118,8 @@ pub fn check_path_modifications( // modified the set of paths, to have an upstream reference that does not change // unnecessarily often. // However, if such commit is not found, we can fall back to the latest upstream commit - let upstream_with_modifications = get_latest_commit_that_modified_files( - git_dir, - target_paths, - config.git_merge_commit_email, - )?; + let upstream_with_modifications = + get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?; match upstream_with_modifications { Some(sha) => Some(sha), None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?, @@ -157,17 +154,38 @@ pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool { !git.status().expect("cannot run git diff-index").success() } -/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found. -/// If `author` is `Some`, only considers commits made by that author. -fn get_latest_commit_that_modified_files( +/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit +/// was found. +fn get_latest_upstream_commit_that_modified_files( git_dir: &Path, + git_config: &GitConfig<'_>, target_paths: &[&str], - author: &str, ) -> Result<Option<String>, String> { let mut git = Command::new("git"); git.current_dir(git_dir); - git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]); + // In theory, we could just use + // `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>` + // to find the latest upstream commit that modified `<paths>`. + // However, this does not work if you are in a subtree sync branch that contains merge commits + // which have the subtree history as their first parent, and the rustc history as second parent: + // `--first-parent` will just walk up the subtree history and never see a single rustc commit. + // We thus have to take a two-pronged approach. First lookup the most recent upstream commit + // by *date* (this should work even in a subtree sync branch), and then start the lookup for + // modified paths starting from that commit. + // + // See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details. + let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)? + .unwrap_or_else(|| "HEAD".to_string()); + + git.args([ + "rev-list", + "--first-parent", + "-n1", + &upstream, + "--author", + git_config.git_merge_commit_email, + ]); if !target_paths.is_empty() { git.arg("--").args(target_paths); @@ -176,37 +194,45 @@ fn get_latest_commit_that_modified_files( if output.is_empty() { Ok(None) } else { Ok(Some(output)) } } -/// Returns the most recent commit found in the local history that should definitely -/// exist upstream. We identify upstream commits by the e-mail of the commit author. +/// Returns the most recent (ordered chronologically) commit found in the local history that +/// should exist upstream. We identify upstream commits by the e-mail of the commit +/// author. /// -/// If `include_head` is false, the HEAD (current) commit will be ignored and only -/// its parents will be searched. This is useful for try/auto CI, where HEAD is -/// actually a commit made by bors, although it is not upstream yet. +/// If we are in CI, we simply return our first parent. fn get_closest_upstream_commit( git_dir: Option<&Path>, config: &GitConfig<'_>, env: CiEnv, ) -> Result<Option<String>, String> { + let base = match env { + CiEnv::None => "HEAD", + CiEnv::GitHubActions => { + // On CI, we should always have a non-upstream merge commit at the tip, + // and our first parent should be the most recently merged upstream commit. + // We thus simply return our first parent. + return resolve_commit_sha(git_dir, "HEAD^1").map(Some); + } + }; + let mut git = Command::new("git"); if let Some(git_dir) = git_dir { git.current_dir(git_dir); } - let base = match env { - CiEnv::None => "HEAD", - CiEnv::GitHubActions => { - // On CI, we always have a merge commit at the tip. - // We thus skip it, because although it can be created by - // `config.git_merge_commit_email`, it should not be upstream. - "HEAD^1" - } - }; + // We do not use `--first-parent`, because we can be in a situation (outside CI) where we have + // a subtree merge that actually has the main rustc history as its second parent. + // Using `--first-parent` would recurse into the history of the subtree, which could have some + // old bors commits that are not relevant to us. + // With `--author-date-order`, git recurses into all parent subtrees, and returns the most + // chronologically recent bors commit. + // Here we assume that none of our subtrees use bors anymore, and that all their old bors + // commits are way older than recent rustc bors commits! git.args([ "rev-list", + "--author-date-order", &format!("--author={}", config.git_merge_commit_email), "-n1", - "--first-parent", &base, ]); @@ -214,6 +240,19 @@ fn get_closest_upstream_commit( if output.is_empty() { Ok(None) } else { Ok(Some(output)) } } +/// Resolve the commit SHA of `commit_ref`. +fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> { + let mut git = Command::new("git"); + + if let Some(git_dir) = git_dir { + git.current_dir(git_dir); + } + + git.args(["rev-parse", commit_ref]); + + Ok(output_result(&mut git)?.trim().to_owned()) +} + /// Returns the files that have been modified in the current branch compared to the master branch. /// This includes committed changes, uncommitted changes, and changes that are not even staged. /// |
