diff options
| author | Jakub Beránek <berykubik@gmail.com> | 2025-03-16 13:34:02 +0100 |
|---|---|---|
| committer | Jakub Beránek <berykubik@gmail.com> | 2025-04-20 09:11:21 +0200 |
| commit | 082ab9c6d89071f2907cae69d45b06a4a6696004 (patch) | |
| tree | 9160d8718d6fb4faffd355a66885d440707532e5 | |
| parent | 49e5e4e3a5610c240a717cb99003a5d5d3356679 (diff) | |
| download | rust-082ab9c6d89071f2907cae69d45b06a4a6696004.tar.gz rust-082ab9c6d89071f2907cae69d45b06a4a6696004.zip | |
Implement a new unified function for figuring out how if a set of paths have been modified locally
Also adds several git tests to make sure that the behavior works in common cases (PR CI, auto CI, local usage).
| -rw-r--r-- | src/bootstrap/Cargo.lock | 10 | ||||
| -rw-r--r-- | src/build_helper/Cargo.toml | 3 | ||||
| -rw-r--r-- | src/build_helper/src/git.rs | 181 | ||||
| -rw-r--r-- | src/build_helper/src/git/tests.rs | 278 |
4 files changed, 465 insertions, 7 deletions
diff --git a/src/bootstrap/Cargo.lock b/src/bootstrap/Cargo.lock index 17ee4d610f9..beb9ea93989 100644 --- a/src/bootstrap/Cargo.lock +++ b/src/bootstrap/Cargo.lock @@ -225,12 +225,12 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -334,9 +334,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.167" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libredox" diff --git a/src/build_helper/Cargo.toml b/src/build_helper/Cargo.toml index 66894e1abc4..bdead0a36de 100644 --- a/src/build_helper/Cargo.toml +++ b/src/build_helper/Cargo.toml @@ -8,3 +8,6 @@ edition = "2021" [dependencies] serde = "1" serde_derive = "1" + +[dev-dependencies] +tempfile = "3.19" diff --git a/src/build_helper/src/git.rs b/src/build_helper/src/git.rs index f5347c30824..e1e7319b9aa 100644 --- a/src/build_helper/src/git.rs +++ b/src/build_helper/src/git.rs @@ -1,3 +1,6 @@ +#[cfg(test)] +mod tests; + use std::path::Path; use std::process::{Command, Stdio}; @@ -167,7 +170,181 @@ pub fn get_closest_merge_commit( Ok(output_result(&mut git)?.trim().to_owned()) } -/// Returns the files that have been modified in the current branch compared to the last merge. +/// Represents the result of checking whether a set of paths +/// have been modified locally or not. +#[derive(PartialEq, Debug)] +pub enum PathFreshness { + /// Artifacts should be downloaded from this upstream commit, + /// there are no local modifications. + LastModifiedUpstream { upstream: String }, + /// There are local modifications to a certain set of paths. + /// "Local" essentially means "not-upstream" here. + /// `upstream` is the latest upstream merge commit that made modifications to the + /// set of paths. + HasLocalModifications { upstream: String }, +} + +/// This function figures out if a set of paths was last modified upstream or +/// if there are some local modifications made to them. +/// +/// It can be used to figure out if we should download artifacts from CI or rather +/// build them locally. +/// +/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir` +/// or the current working directory whose modifications would invalidate the artifact. +/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside +/// the `foo` directory. +/// See https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec +/// for how git `pathspec` works. +/// +/// The function behaves differently in CI and outside CI. +/// +/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on +/// top of the local master branch. +/// If not, we try to find the most recent upstream commit (which we assume are commits +/// made by bors) that modified `target_paths`. +/// We don't want to simply take the latest master commit to avoid changing the output of +/// this function frequently after rebasing on the latest master branch even if `target_paths` +/// were not modified upstream in the meantime. In that case we would be redownloading CI +/// artifacts unnecessarily. +/// +/// - In CI, we always fetch only a single parent merge commit, so we do not have access +/// to the full git history. Luckily, we only need to distinguish between two situations: +/// 1) The current PR made modifications to `target_paths`. +/// In that case, a build is typically necessary. +/// 2) The current PR did not make modifications to `target_paths`. +/// In that case we simply take the latest upstream commit, because on CI there is no need to avoid +/// redownloading. +pub fn check_path_modifications( + git_dir: Option<&Path>, + config: &GitConfig<'_>, + target_paths: &[&str], + ci_env: CiEnv, +) -> Result<PathFreshness, String> { + assert!(!target_paths.is_empty()); + for path in target_paths { + assert!(Path::new(path.trim_start_matches(":!")).is_relative()); + } + + let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) { + // Here the situation is different for PR CI and try/auto CI. + // For PR CI, we have the following history: + // <merge commit made by GitHub> + // 1-N PR commits + // upstream merge commit made by bors + // + // For try/auto CI, we have the following history: + // <**non-upstream** merge commit made by bors> + // 1-N PR commits + // upstream merge commit made by bors + // + // But on both cases, HEAD should be a merge commit. + // So if HEAD contains modifications of `target_paths`, our PR has modified + // them. If not, we can use the only available upstream commit for downloading + // artifacts. + + // Do not include HEAD, as it is never an upstream commit + get_closest_upstream_commit(git_dir, config, ci_env)? + } else { + // Outside CI, we have to find the most recent upstream commit that + // modified the set of paths, to have an upstream reference. + let upstream_sha = get_latest_commit_that_modified_files( + git_dir, + target_paths, + config.git_merge_commit_email, + )?; + let Some(upstream_sha) = upstream_sha else { + eprintln!("No upstream commit that modified paths {target_paths:?} found."); + eprintln!("Try to fetch more upstream history."); + return Err("No upstream commit with modifications found".to_string()); + }; + upstream_sha + }; + + if has_changed_since(git_dir, &upstream_sha, target_paths) { + Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha }) + } else { + Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha }) + } +} + +/// Returns true if any of the passed `paths` have changed since the `base` commit. +pub fn has_changed_since(git_dir: Option<&Path>, base: &str, paths: &[&str]) -> bool { + let mut git = Command::new("git"); + + if let Some(git_dir) = git_dir { + git.current_dir(git_dir); + } + + git.args(["diff-index", "--quiet", base, "--"]).args(paths); + + // Exit code 0 => no changes + // Exit code 1 => some changes were detected + !git.status().expect("cannot run git diff-index").success() +} + +/// Returns the latest commit that modified `target_paths`, or `None` if no such commit was found. +/// If `author` is `Some`, only considers commits made by that author. +fn get_latest_commit_that_modified_files( + git_dir: Option<&Path>, + target_paths: &[&str], + author: &str, +) -> Result<Option<String>, String> { + let mut git = Command::new("git"); + + if let Some(git_dir) = git_dir { + git.current_dir(git_dir); + } + + git.args(["rev-list", "-n1", "--first-parent", "HEAD", "--author", author]); + + if !target_paths.is_empty() { + git.arg("--").args(target_paths); + } + let output = output_result(&mut git)?.trim().to_owned(); + if output.is_empty() { Ok(None) } else { Ok(Some(output)) } +} + +/// Returns the most recent commit found in the local history that should definitely +/// exist upstream. We identify upstream commits by the e-mail of the commit author. +/// +/// If `include_head` is false, the HEAD (current) commit will be ignored and only +/// its parents will be searched. This is useful for try/auto CI, where HEAD is +/// actually a commit made by bors, although it is not upstream yet. +fn get_closest_upstream_commit( + git_dir: Option<&Path>, + config: &GitConfig<'_>, + env: CiEnv, +) -> Result<String, String> { + let mut git = Command::new("git"); + + if let Some(git_dir) = git_dir { + git.current_dir(git_dir); + } + + let base = match env { + CiEnv::None => "HEAD", + CiEnv::GitHubActions => { + // On CI, we always have a merge commit at the tip. + // We thus skip it, because although it can be creatd by + // `config.git_merge_commit_email`, it should not be upstream. + "HEAD^1" + } + }; + git.args([ + "rev-list", + &format!("--author={}", config.git_merge_commit_email), + "-n1", + "--first-parent", + &base, + ]); + + Ok(output_result(&mut git)?.trim().to_owned()) +} + +/// Returns the files that have been modified in the current branch compared to the master branch. +/// This includes committed changes, uncommitted changes, and changes that are not even staged. +/// /// The `extensions` parameter can be used to filter the files by their extension. /// Does not include removed files. /// If `extensions` is empty, all files will be returned. @@ -176,7 +353,7 @@ pub fn get_git_modified_files( git_dir: Option<&Path>, extensions: &[&str], ) -> Result<Vec<String>, String> { - let merge_base = get_closest_merge_commit(git_dir, config, &[])?; + let merge_base = get_closest_upstream_commit(git_dir, config, CiEnv::None)?; let mut git = Command::new("git"); if let Some(git_dir) = git_dir { diff --git a/src/build_helper/src/git/tests.rs b/src/build_helper/src/git/tests.rs new file mode 100644 index 00000000000..cdf50e14218 --- /dev/null +++ b/src/build_helper/src/git/tests.rs @@ -0,0 +1,278 @@ +use crate::ci::CiEnv; +use crate::git::{GitConfig, PathFreshness, check_path_modifications}; +use std::ffi::OsStr; +use std::fs::OpenOptions; +use std::process::Command; + +#[test] +fn test_pr_ci_unchanged_anywhere() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_nonupstream_merge(&["b"]); + let src = ctx.get_source(&["c"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::LastModifiedUpstream { upstream: sha }); + }); +} + +#[test] +fn test_pr_ci_changed_in_pr() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_nonupstream_merge(&["b"]); + let src = ctx.get_source(&["b"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::HasLocalModifications { upstream: sha }); + }); +} + +#[test] +fn test_auto_ci_unchanged_anywhere_select_parent() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_upstream_merge(&["b"]); + let src = ctx.get_source(&["c"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::LastModifiedUpstream { upstream: sha }); + }); +} + +#[test] +fn test_auto_ci_changed_in_pr() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_upstream_merge(&["b", "c"]); + let src = ctx.get_source(&["c", "d"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::HasLocalModifications { upstream: sha }); + }); +} + +#[test] +fn test_local_uncommitted_modifications() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_branch("feature"); + ctx.modify("a"); + + assert_eq!( + ctx.get_source(&["a", "d"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: sha } + ); + }); +} + +#[test] +fn test_local_committed_modifications() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_upstream_merge(&["b", "c"]); + ctx.create_branch("feature"); + ctx.modify("x"); + ctx.commit(); + ctx.modify("a"); + ctx.commit(); + + assert_eq!( + ctx.get_source(&["a", "d"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: sha } + ); + }); +} + +#[test] +fn test_local_committed_modifications_subdirectory() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a/b/c"]); + ctx.create_upstream_merge(&["b", "c"]); + ctx.create_branch("feature"); + ctx.modify("a/b/d"); + ctx.commit(); + + assert_eq!( + ctx.get_source(&["a/b"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: sha } + ); + }); +} + +#[test] +fn test_changes_in_head_upstream() { + git_test(|ctx| { + // We want to resolve to the upstream commit that made modifications to a, + // even if it is currently HEAD + let sha = ctx.create_upstream_merge(&["a"]); + assert_eq!( + ctx.get_source(&["a", "d"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: sha } + ); + }); +} + +#[test] +fn test_changes_in_previous_upstream() { + git_test(|ctx| { + // We want to resolve to this commit, which modified a + let sha = ctx.create_upstream_merge(&["a", "e"]); + // Not to this commit, which is the latest upstream commit + ctx.create_upstream_merge(&["b", "c"]); + ctx.create_branch("feature"); + ctx.modify("d"); + ctx.commit(); + assert_eq!( + ctx.get_source(&["a"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: sha } + ); + }); +} + +#[test] +fn test_changes_negative_path() { + git_test(|ctx| { + let upstream = ctx.create_upstream_merge(&["a"]); + ctx.create_branch("feature"); + ctx.modify("b"); + ctx.modify("d"); + ctx.commit(); + + assert_eq!( + ctx.get_source(&[":!b", ":!d"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: upstream.clone() } + ); + assert_eq!( + ctx.get_source(&[":!c"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: upstream.clone() } + ); + assert_eq!( + ctx.get_source(&[":!d", ":!x"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream } + ); + }); +} + +struct GitCtx { + dir: tempfile::TempDir, + git_repo: String, + nightly_branch: String, + merge_bot_email: String, +} + +impl GitCtx { + fn new() -> Self { + let dir = tempfile::TempDir::new().unwrap(); + let ctx = Self { + dir, + git_repo: "rust-lang/rust".to_string(), + nightly_branch: "nightly".to_string(), + merge_bot_email: "Merge bot <merge-bot@rust-lang.org>".to_string(), + }; + ctx.run_git(&["init"]); + ctx.run_git(&["config", "user.name", "Tester"]); + ctx.run_git(&["config", "user.email", "tester@rust-lang.org"]); + ctx.modify("README.md"); + ctx.commit(); + ctx.run_git(&["branch", "-m", "main"]); + ctx + } + + fn get_source(&self, target_paths: &[&str], ci_env: CiEnv) -> PathFreshness { + check_path_modifications(Some(self.dir.path()), &self.git_config(), target_paths, ci_env) + .unwrap() + } + + fn create_upstream_merge(&self, modified_files: &[&str]) -> String { + self.create_branch_and_merge("previous-pr", modified_files, &self.merge_bot_email) + } + + fn create_nonupstream_merge(&self, modified_files: &[&str]) -> String { + self.create_branch_and_merge("pr", modified_files, "Tester <tester@rust-lang.org>") + } + + fn create_branch_and_merge( + &self, + branch: &str, + modified_files: &[&str], + author: &str, + ) -> String { + self.create_branch(branch); + for file in modified_files { + self.modify(file); + } + self.commit(); + self.switch_to_branch("main"); + self.merge(branch, author); + self.run_git(&["branch", "-d", branch]); + self.get_current_commit() + } + + fn get_current_commit(&self) -> String { + self.run_git(&["rev-parse", "HEAD"]) + } + + fn merge(&self, branch: &str, author: &str) { + self.run_git(&["merge", "--no-commit", "--no-ff", branch]); + self.run_git(&[ + "commit".to_string(), + "-m".to_string(), + "Merge of {branch}".to_string(), + "--author".to_string(), + author.to_string(), + ]); + } + + fn modify(&self, path: &str) { + use std::io::Write; + + let path = self.dir.path().join(path); + std::fs::create_dir_all(&path.parent().unwrap()).unwrap(); + + let mut file = OpenOptions::new().create(true).append(true).open(path).unwrap(); + writeln!(file, "line").unwrap(); + } + + fn commit(&self) -> String { + self.run_git(&["add", "."]); + self.run_git(&["commit", "-m", "commit message"]); + self.get_current_commit() + } + + fn switch_to_branch(&self, name: &str) { + self.run_git(&["switch", name]); + } + + /// Creates a branch and switches to it. + fn create_branch(&self, name: &str) { + self.run_git(&["checkout", "-b", name]); + } + + fn run_git<S: AsRef<OsStr>>(&self, args: &[S]) -> String { + let mut cmd = self.git_cmd(); + cmd.args(args); + eprintln!("Running {cmd:?}"); + let output = cmd.output().unwrap(); + let stdout = String::from_utf8(output.stdout).unwrap().trim().to_string(); + let stderr = String::from_utf8(output.stderr).unwrap().trim().to_string(); + if !output.status.success() { + panic!("Git command `{cmd:?}` failed\nStdout\n{stdout}\nStderr\n{stderr}"); + } + stdout + } + + fn git_cmd(&self) -> Command { + let mut cmd = Command::new("git"); + cmd.current_dir(&self.dir); + cmd + } + + fn git_config(&self) -> GitConfig<'_> { + GitConfig { + git_repository: &self.git_repo, + nightly_branch: &self.nightly_branch, + git_merge_commit_email: &self.merge_bot_email, + } + } +} + +fn git_test<F>(test_fn: F) +where + F: FnOnce(&GitCtx), +{ + let ctx = GitCtx::new(); + test_fn(&ctx); +} |
