about summary refs log tree commit diff
path: root/src/build_helper/src/git.rs
blob: 9d1195aadf848d213a00370ca227bf33ad3ca45b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
use std::path::Path;
use std::process::{Command, Stdio};

use crate::ci::CiEnv;

#[derive(Debug)]
pub struct GitConfig<'a> {
    pub nightly_branch: &'a str,
    pub git_merge_commit_email: &'a str,
}

/// Runs a command and returns the output
pub fn output_result(cmd: &mut Command) -> Result<String, String> {
    let output = match cmd.stderr(Stdio::inherit()).output() {
        Ok(status) => status,
        Err(e) => return Err(format!("failed to run command: {:?}: {}", cmd, e)),
    };
    if !output.status.success() {
        return Err(format!(
            "command did not execute successfully: {:?}\n\
             expected success, got: {}\n{}",
            cmd,
            output.status,
            String::from_utf8(output.stderr).map_err(|err| format!("{err:?}"))?
        ));
    }
    String::from_utf8(output.stdout).map_err(|err| format!("{err:?}"))
}

/// Represents the result of checking whether a set of paths
/// have been modified locally or not.
#[derive(PartialEq, Debug, Clone)]
pub enum PathFreshness {
    /// Artifacts should be downloaded from this upstream commit,
    /// there are no local modifications.
    LastModifiedUpstream { upstream: String },
    /// There are local modifications to a certain set of paths.
    /// "Local" essentially means "not-upstream" here.
    /// `upstream` is the latest upstream merge commit that made modifications to the
    /// set of paths.
    HasLocalModifications { upstream: String },
    /// No upstream commit was found.
    /// This should not happen in most reasonable circumstances, but one never knows.
    MissingUpstream,
}

/// This function figures out if a set of paths was last modified upstream or
/// if there are some local modifications made to them.
/// It can be used to figure out if we should download artifacts from CI or rather
/// build them locally.
///
/// The function assumes that at least a single upstream bors merge commit is in the
/// local git history.
///
/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir`
/// whose modifications would invalidate the artifact.
/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside
/// the `foo` directory.
/// See <https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefpathspecapathspec>
/// for how git `pathspec` works.
///
/// The function behaves differently in CI and outside CI.
///
/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on
/// top of the latest upstream commit that is available in local git history.
/// If not, we try to find the most recent upstream commit (which we assume are commits
/// made by bors) that modified `target_paths`.
/// We don't want to simply take the latest master commit to avoid changing the output of
/// this function frequently after rebasing on the latest master branch even if `target_paths`
/// were not modified upstream in the meantime. In that case we would be redownloading CI
/// artifacts unnecessarily.
///
/// - In CI, we use a shallow clone of depth 2, i.e., we fetch only a single parent commit
/// (which will be the most recent bors merge commit) and do not have access
/// to the full git history. Luckily, we only need to distinguish between two situations:
/// 1) The current PR made modifications to `target_paths`.
/// In that case, a build is typically necessary.
/// 2) The current PR did not make modifications to `target_paths`.
/// In that case we simply take the latest upstream commit, because on CI there is no need to avoid
/// redownloading.
pub fn check_path_modifications(
    git_dir: &Path,
    config: &GitConfig<'_>,
    target_paths: &[&str],
    ci_env: CiEnv,
) -> Result<PathFreshness, String> {
    assert!(!target_paths.is_empty());
    for path in target_paths {
        assert!(Path::new(path.trim_start_matches(":!")).is_relative());
    }

    let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) {
        // Here the situation is different for PR CI and try/auto CI.
        // For PR CI, we have the following history:
        // <merge commit made by GitHub>
        // 1-N PR commits
        // upstream merge commit made by bors
        //
        // For try/auto CI, we have the following history:
        // <**non-upstream** merge commit made by bors>
        // 1-N PR commits
        // upstream merge commit made by bors
        //
        // But on both cases, HEAD should be a merge commit.
        // So if HEAD contains modifications of `target_paths`, our PR has modified
        // them. If not, we can use the only available upstream commit for downloading
        // artifacts.

        // Do not include HEAD, as it is never an upstream commit
        // If we do not find an upstream commit in CI, something is seriously wrong.
        Some(
            get_closest_upstream_commit(Some(git_dir), config, ci_env)?
                .expect("No upstream commit was found on CI"),
        )
    } else {
        // Outside CI, we want to find the most recent upstream commit that
        // modified the set of paths, to have an upstream reference that does not change
        // unnecessarily often.
        // However, if such commit is not found, we can fall back to the latest upstream commit
        let upstream_with_modifications =
            get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?;
        match upstream_with_modifications {
            Some(sha) => Some(sha),
            None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?,
        }
    };

    let Some(upstream_sha) = upstream_sha else {
        return Ok(PathFreshness::MissingUpstream);
    };

    // For local environments, we want to find out if something has changed
    // from the latest upstream commit.
    // However, that should be equivalent to checking if something has changed
    // from the latest upstream commit *that modified `target_paths`*, and
    // with this approach we do not need to invoke git an additional time.
    if has_changed_since(git_dir, &upstream_sha, target_paths) {
        Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha })
    } else {
        Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha })
    }
}

/// Returns true if any of the passed `paths` have changed since the `base` commit.
pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool {
    let mut git = Command::new("git");
    git.current_dir(git_dir);

    git.args(["diff-index", "--quiet", base, "--"]).args(paths);

    // Exit code 0 => no changes
    // Exit code 1 => some changes were detected
    !git.status().expect("cannot run git diff-index").success()
}

/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit
/// was found.
fn get_latest_upstream_commit_that_modified_files(
    git_dir: &Path,
    git_config: &GitConfig<'_>,
    target_paths: &[&str],
) -> Result<Option<String>, String> {
    let mut git = Command::new("git");
    git.current_dir(git_dir);

    // In theory, we could just use
    // `git rev-list --first-parent HEAD --author=<merge-bot> -- <paths>`
    // to find the latest upstream commit that modified `<paths>`.
    // However, this does not work if you are in a subtree sync branch that contains merge commits
    // which have the subtree history as their first parent, and the rustc history as second parent:
    // `--first-parent` will just walk up the subtree history and never see a single rustc commit.
    // We thus have to take a two-pronged approach. First lookup the most recent upstream commit
    // by *date* (this should work even in a subtree sync branch), and then start the lookup for
    // modified paths starting from that commit.
    //
    // See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details.
    let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)?
        .unwrap_or_else(|| "HEAD".to_string());

    git.args([
        "rev-list",
        "--first-parent",
        "-n1",
        &upstream,
        "--author",
        git_config.git_merge_commit_email,
    ]);

    if !target_paths.is_empty() {
        git.arg("--").args(target_paths);
    }
    let output = output_result(&mut git)?.trim().to_owned();
    if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
}

/// Returns the most recent (ordered chronologically) commit found in the local history that
/// should exist upstream. We identify upstream commits by the e-mail of the commit
/// author.
///
/// If we are in CI, we simply return our first parent.
pub fn get_closest_upstream_commit(
    git_dir: Option<&Path>,
    config: &GitConfig<'_>,
    env: CiEnv,
) -> Result<Option<String>, String> {
    let base = match env {
        CiEnv::None => "HEAD",
        CiEnv::GitHubActions => {
            // On CI, we should always have a non-upstream merge commit at the tip,
            // and our first parent should be the most recently merged upstream commit.
            // We thus simply return our first parent.
            return resolve_commit_sha(git_dir, "HEAD^1").map(Some);
        }
    };

    let mut git = Command::new("git");

    if let Some(git_dir) = git_dir {
        git.current_dir(git_dir);
    }

    // We do not use `--first-parent`, because we can be in a situation (outside CI) where we have
    // a subtree merge that actually has the main rustc history as its second parent.
    // Using `--first-parent` would recurse into the history of the subtree, which could have some
    // old bors commits that are not relevant to us.
    // With `--author-date-order`, git recurses into all parent subtrees, and returns the most
    // chronologically recent bors commit.
    // Here we assume that none of our subtrees use bors anymore, and that all their old bors
    // commits are way older than recent rustc bors commits!
    git.args([
        "rev-list",
        "--author-date-order",
        &format!("--author={}", config.git_merge_commit_email),
        "-n1",
        &base,
    ]);

    let output = output_result(&mut git)?.trim().to_owned();
    if output.is_empty() { Ok(None) } else { Ok(Some(output)) }
}

/// Resolve the commit SHA of `commit_ref`.
fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result<String, String> {
    let mut git = Command::new("git");

    if let Some(git_dir) = git_dir {
        git.current_dir(git_dir);
    }

    git.args(["rev-parse", commit_ref]);

    Ok(output_result(&mut git)?.trim().to_owned())
}

/// Returns the files that have been modified in the current branch compared to the master branch.
/// This includes committed changes, uncommitted changes, and changes that are not even staged.
///
/// The `extensions` parameter can be used to filter the files by their extension.
/// Does not include removed files.
/// If `extensions` is empty, all files will be returned.
pub fn get_git_modified_files(
    config: &GitConfig<'_>,
    git_dir: Option<&Path>,
    extensions: &[&str],
) -> Result<Vec<String>, String> {
    let Some(merge_base) = get_closest_upstream_commit(git_dir, config, CiEnv::None)? else {
        return Err("No upstream commit was found".to_string());
    };

    let mut git = Command::new("git");
    if let Some(git_dir) = git_dir {
        git.current_dir(git_dir);
    }
    let files = output_result(git.args(["diff-index", "--name-status", merge_base.trim()]))?
        .lines()
        .filter_map(|f| {
            let (status, name) = f.trim().split_once(char::is_whitespace).unwrap();
            if status == "D" {
                None
            } else if Path::new(name).extension().map_or(extensions.is_empty(), |ext| {
                // If there is no extension, we allow the path if `extensions` is empty
                // If there is an extension, we allow it if `extension` is empty or it contains the
                // extension.
                extensions.is_empty() || extensions.contains(&ext.to_str().unwrap())
            }) {
                Some(name.to_owned())
            } else {
                None
            }
        })
        .collect();
    Ok(files)
}

/// Returns the files that haven't been added to git yet.
pub fn get_git_untracked_files(git_dir: Option<&Path>) -> Result<Option<Vec<String>>, String> {
    let mut git = Command::new("git");
    if let Some(git_dir) = git_dir {
        git.current_dir(git_dir);
    }

    let files = output_result(git.arg("ls-files").arg("--others").arg("--exclude-standard"))?
        .lines()
        .map(|s| s.trim().to_owned())
        .collect();
    Ok(Some(files))
}