6 files changed, 532 insertions, 472 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 96c0955e871..25397006ee2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -239,16 +239,31 @@ jobs:
         if: github.event_name == 'push' || env.DEPLOY == '1' || env.DEPLOY_ALT == '1'
 
       - name: postprocess metrics into the summary
+        # This step is not critical, and if some I/O problem happens, we don't want
+        # to cancel the build.
+        continue-on-error: true
         run: |
           if [ -f build/metrics.json ]; then
-            ./build/citool/debug/citool postprocess-metrics build/metrics.json ${GITHUB_STEP_SUMMARY}
+            METRICS=build/metrics.json
           elif [ -f obj/build/metrics.json ]; then
-            ./build/citool/debug/citool postprocess-metrics obj/build/metrics.json ${GITHUB_STEP_SUMMARY}
+            METRICS=obj/build/metrics.json
           else
             echo "No metrics.json found"
+            exit 0
           fi
 
+          # Get closest bors merge commit
+          PARENT_COMMIT=`git rev-list --author='bors <bors@rust-lang.org>' -n1 --first-parent HEAD^1`
+
+          ./build/citool/debug/citool postprocess-metrics \
+              --job-name ${CI_JOB_NAME} \
+              --parent ${PARENT_COMMIT} \
+              ${METRICS} >> ${GITHUB_STEP_SUMMARY}
+
       - name: upload job metrics to DataDog
+        # This step is not critical, and if some I/O problem happens, we don't want
+        # to cancel the build.
+        continue-on-error: true
         if: needs.calculate_matrix.outputs.run_type != 'pr'
         env:
           DATADOG_API_KEY: ${{ secrets.DATADOG_API_KEY }}
diff --git a/src/ci/citool/src/analysis.rs b/src/ci/citool/src/analysis.rs
new file mode 100644
index 00000000000..2088ce29620
--- /dev/null
+++ b/src/ci/citool/src/analysis.rs
@@ -0,0 +1,362 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+
+use build_helper::metrics::{
+    BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, format_build_steps,
+};
+
+use crate::metrics;
+use crate::metrics::{JobMetrics, JobName, get_test_suites};
+use crate::utils::{output_details, pluralize};
+
+pub fn output_bootstrap_stats(metrics: &JsonRoot) {
+    if !metrics.invocations.is_empty() {
+        println!("# Bootstrap steps");
+        record_bootstrap_step_durations(&metrics);
+        record_test_suites(&metrics);
+    }
+}
+
+fn record_bootstrap_step_durations(metrics: &JsonRoot) {
+    for invocation in &metrics.invocations {
+        let step = BuildStep::from_invocation(invocation);
+        let table = format_build_steps(&step);
+        eprintln!("Step `{}`\n{table}\n", invocation.cmdline);
+        output_details(&invocation.cmdline, || {
+            println!("<pre><code>{table}</code></pre>");
+        });
+    }
+    eprintln!("Recorded {} bootstrap invocation(s)", metrics.invocations.len());
+}
+
+fn record_test_suites(metrics: &JsonRoot) {
+    let suites = metrics::get_test_suites(&metrics);
+
+    if !suites.is_empty() {
+        let aggregated = aggregate_test_suites(&suites);
+        let table = render_table(aggregated);
+        println!("\n# Test results\n");
+        println!("{table}");
+    } else {
+        eprintln!("No test suites found in metrics");
+    }
+}
+
+fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
+    use std::fmt::Write;
+
+    let mut table = "| Test suite | Passed ✅ | Ignored 🚫 | Failed  ❌ |\n".to_string();
+    writeln!(table, "|:------|------:|------:|------:|").unwrap();
+
+    fn compute_pct(value: f64, total: f64) -> f64 {
+        if total == 0.0 { 0.0 } else { value / total }
+    }
+
+    fn write_row(
+        buffer: &mut String,
+        name: &str,
+        record: &TestSuiteRecord,
+        surround: &str,
+    ) -> std::fmt::Result {
+        let TestSuiteRecord { passed, ignored, failed } = record;
+        let total = (record.passed + record.ignored + record.failed) as f64;
+        let passed_pct = compute_pct(*passed as f64, total) * 100.0;
+        let ignored_pct = compute_pct(*ignored as f64, total) * 100.0;
+        let failed_pct = compute_pct(*failed as f64, total) * 100.0;
+
+        write!(buffer, "| {surround}{name}{surround} |")?;
+        write!(buffer, " {surround}{passed} ({passed_pct:.0}%){surround} |")?;
+        write!(buffer, " {surround}{ignored} ({ignored_pct:.0}%){surround} |")?;
+        writeln!(buffer, " {surround}{failed} ({failed_pct:.0}%){surround} |")?;
+
+        Ok(())
+    }
+
+    let mut total = TestSuiteRecord::default();
+    for (name, record) in suites {
+        write_row(&mut table, &name, &record, "").unwrap();
+        total.passed += record.passed;
+        total.ignored += record.ignored;
+        total.failed += record.failed;
+    }
+    write_row(&mut table, "Total", &total, "**").unwrap();
+    table
+}
+
+/// Computes a post merge CI analysis report of test differences
+/// between the `parent` and `current` commits.
+pub fn output_test_diffs(job_metrics: HashMap<JobName, JobMetrics>) {
+    let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
+    report_test_diffs(aggregated_test_diffs);
+}
+
+#[derive(Default)]
+struct TestSuiteRecord {
+    passed: u64,
+    ignored: u64,
+    failed: u64,
+}
+
+fn test_metadata_name(metadata: &TestSuiteMetadata) -> String {
+    match metadata {
+        TestSuiteMetadata::CargoPackage { crates, stage, .. } => {
+            format!("{} (stage {stage})", crates.join(", "))
+        }
+        TestSuiteMetadata::Compiletest { suite, stage, .. } => {
+            format!("{suite} (stage {stage})")
+        }
+    }
+}
+
+fn aggregate_test_suites(suites: &[&TestSuite]) -> BTreeMap<String, TestSuiteRecord> {
+    let mut records: BTreeMap<String, TestSuiteRecord> = BTreeMap::new();
+    for suite in suites {
+        let name = test_metadata_name(&suite.metadata);
+        let record = records.entry(name).or_default();
+        for test in &suite.tests {
+            match test.outcome {
+                TestOutcome::Passed => {
+                    record.passed += 1;
+                }
+                TestOutcome::Failed => {
+                    record.failed += 1;
+                }
+                TestOutcome::Ignored { .. } => {
+                    record.ignored += 1;
+                }
+            }
+        }
+    }
+    records
+}
+
+/// Represents a difference in the outcome of tests between a base and a current commit.
+/// Maps test diffs to jobs that contained them.
+#[derive(Debug)]
+struct AggregatedTestDiffs {
+    diffs: HashMap<TestDiff, Vec<JobName>>,
+}
+
+fn aggregate_test_diffs(jobs: &HashMap<JobName, JobMetrics>) -> AggregatedTestDiffs {
+    let mut diffs: HashMap<TestDiff, Vec<JobName>> = HashMap::new();
+
+    // Aggregate test suites
+    for (name, metrics) in jobs {
+        if let Some(parent) = &metrics.parent {
+            let tests_parent = aggregate_tests(parent);
+            let tests_current = aggregate_tests(&metrics.current);
+            for diff in calculate_test_diffs(tests_parent, tests_current) {
+                diffs.entry(diff).or_default().push(name.to_string());
+            }
+        }
+    }
+
+    AggregatedTestDiffs { diffs }
+}
+
+#[derive(Eq, PartialEq, Hash, Debug)]
+enum TestOutcomeDiff {
+    ChangeOutcome { before: TestOutcome, after: TestOutcome },
+    Missing { before: TestOutcome },
+    Added(TestOutcome),
+}
+
+#[derive(Eq, PartialEq, Hash, Debug)]
+struct TestDiff {
+    test: Test,
+    diff: TestOutcomeDiff,
+}
+
+fn calculate_test_diffs(parent: TestSuiteData, current: TestSuiteData) -> HashSet<TestDiff> {
+    let mut diffs = HashSet::new();
+    for (test, outcome) in &current.tests {
+        match parent.tests.get(test) {
+            Some(before) => {
+                if before != outcome {
+                    diffs.insert(TestDiff {
+                        test: test.clone(),
+                        diff: TestOutcomeDiff::ChangeOutcome {
+                            before: before.clone(),
+                            after: outcome.clone(),
+                        },
+                    });
+                }
+            }
+            None => {
+                diffs.insert(TestDiff {
+                    test: test.clone(),
+                    diff: TestOutcomeDiff::Added(outcome.clone()),
+                });
+            }
+        }
+    }
+    for (test, outcome) in &parent.tests {
+        if !current.tests.contains_key(test) {
+            diffs.insert(TestDiff {
+                test: test.clone(),
+                diff: TestOutcomeDiff::Missing { before: outcome.clone() },
+            });
+        }
+    }
+
+    diffs
+}
+
+/// Aggregates test suite executions from all bootstrap invocations in a given CI job.
+#[derive(Default)]
+struct TestSuiteData {
+    tests: HashMap<Test, TestOutcome>,
+}
+
+#[derive(Hash, PartialEq, Eq, Debug, Clone)]
+struct Test {
+    name: String,
+    is_doctest: bool,
+}
+
+/// Extracts all tests from the passed metrics and map them to their outcomes.
+fn aggregate_tests(metrics: &JsonRoot) -> TestSuiteData {
+    let mut tests = HashMap::new();
+    let test_suites = get_test_suites(&metrics);
+    for suite in test_suites {
+        for test in &suite.tests {
+            // Poor man's detection of doctests based on the "(line XYZ)" suffix
+            let is_doctest = matches!(suite.metadata, TestSuiteMetadata::CargoPackage { .. })
+                && test.name.contains("(line");
+            let test_entry = Test { name: generate_test_name(&test.name, &suite), is_doctest };
+            tests.insert(test_entry, test.outcome.clone());
+        }
+    }
+    TestSuiteData { tests }
+}
+
+/// Normalizes Windows-style path delimiters to Unix-style paths
+/// and adds suite metadata to the test name.
+fn generate_test_name(name: &str, suite: &TestSuite) -> String {
+    let name = name.replace('\\', "/");
+    let stage = match suite.metadata {
+        TestSuiteMetadata::CargoPackage { stage, .. } => stage,
+        TestSuiteMetadata::Compiletest { stage, .. } => stage,
+    };
+
+    format!("{name} (stage {stage})")
+}
+
+/// Prints test changes in Markdown format to stdout.
+fn report_test_diffs(diff: AggregatedTestDiffs) {
+    println!("# Test differences");
+    if diff.diffs.is_empty() {
+        println!("No test diffs found");
+        return;
+    }
+
+    fn format_outcome(outcome: &TestOutcome) -> String {
+        match outcome {
+            TestOutcome::Passed => "pass".to_string(),
+            TestOutcome::Failed => "fail".to_string(),
+            TestOutcome::Ignored { ignore_reason } => {
+                let reason = match ignore_reason {
+                    Some(reason) => format!(" ({reason})"),
+                    None => String::new(),
+                };
+                format!("ignore{reason}")
+            }
+        }
+    }
+
+    fn format_diff(diff: &TestOutcomeDiff) -> String {
+        match diff {
+            TestOutcomeDiff::ChangeOutcome { before, after } => {
+                format!("{} -> {}", format_outcome(before), format_outcome(after))
+            }
+            TestOutcomeDiff::Missing { before } => {
+                format!("{} -> [missing]", format_outcome(before))
+            }
+            TestOutcomeDiff::Added(outcome) => {
+                format!("[missing] -> {}", format_outcome(outcome))
+            }
+        }
+    }
+
+    fn format_job_group(group: u64) -> String {
+        format!("**J{group}**")
+    }
+
+    // It would be quite noisy to repeat the jobs that contained the test changes after/next to
+    // every test diff. At the same time, grouping the test diffs by
+    // [unique set of jobs that contained them] also doesn't work well, because the test diffs
+    // would have to be duplicated several times.
+    // Instead, we create a set of unique job groups, and then print a job group after each test.
+    // We then print the job groups at the end, as a sort of index.
+    let mut grouped_diffs: Vec<(&TestDiff, u64)> = vec![];
+    let mut job_list_to_group: HashMap<&[JobName], u64> = HashMap::new();
+    let mut job_index: Vec<&[JobName]> = vec![];
+
+    let original_diff_count = diff.diffs.len();
+    let diffs = diff
+        .diffs
+        .into_iter()
+        .filter(|(diff, _)| !diff.test.is_doctest)
+        .map(|(diff, mut jobs)| {
+            jobs.sort();
+            (diff, jobs)
+        })
+        .collect::<Vec<_>>();
+    let doctest_count = original_diff_count.saturating_sub(diffs.len());
+
+    let max_diff_count = 100;
+    for (diff, jobs) in diffs.iter().take(max_diff_count) {
+        let jobs = &*jobs;
+        let job_group = match job_list_to_group.get(jobs.as_slice()) {
+            Some(id) => *id,
+            None => {
+                let id = job_index.len() as u64;
+                job_index.push(jobs);
+                job_list_to_group.insert(jobs, id);
+                id
+            }
+        };
+        grouped_diffs.push((diff, job_group));
+    }
+
+    // Sort diffs by job group and test name
+    grouped_diffs.sort_by(|(d1, g1), (d2, g2)| g1.cmp(&g2).then(d1.test.name.cmp(&d2.test.name)));
+
+    output_details(
+        &format!("Show {} test {}\n", original_diff_count, pluralize("diff", original_diff_count)),
+        || {
+            for (diff, job_group) in grouped_diffs {
+                println!(
+                    "- `{}`: {} ({})",
+                    diff.test.name,
+                    format_diff(&diff.diff),
+                    format_job_group(job_group)
+                );
+            }
+
+            let extra_diffs = diffs.len().saturating_sub(max_diff_count);
+            if extra_diffs > 0 {
+                println!(
+                    "\n(and {extra_diffs} additional {})",
+                    pluralize("test diff", extra_diffs)
+                );
+            }
+
+            if doctest_count > 0 {
+                println!(
+                    "\nAdditionally, {doctest_count} doctest {} were found. These are ignored, as they are noisy.",
+                    pluralize("diff", doctest_count)
+                );
+            }
+
+            // Now print the job group index
+            println!("\n**Job group index**\n");
+            for (group, jobs) in job_index.into_iter().enumerate() {
+                println!(
+                    "- {}: {}",
+                    format_job_group(group as u64),
+                    jobs.iter().map(|j| format!("`{j}`")).collect::<Vec<_>>().join(", ")
+                );
+            }
+        },
+    );
+}
diff --git a/src/ci/citool/src/main.rs b/src/ci/citool/src/main.rs
index cd690ebeb06..9e4b558d77a 100644
--- a/src/ci/citool/src/main.rs
+++ b/src/ci/citool/src/main.rs
@@ -1,25 +1,26 @@
+mod analysis;
 mod cpu_usage;
 mod datadog;
 mod jobs;
-mod merge_report;
 mod metrics;
 mod utils;
 
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap};
 use std::path::{Path, PathBuf};
 use std::process::Command;
 
+use analysis::output_bootstrap_stats;
 use anyhow::Context;
 use clap::Parser;
 use jobs::JobDatabase;
 use serde_yaml::Value;
 
+use crate::analysis::output_test_diffs;
 use crate::cpu_usage::load_cpu_usage;
 use crate::datadog::upload_datadog_metric;
 use crate::jobs::RunType;
-use crate::merge_report::post_merge_report;
-use crate::metrics::postprocess_metrics;
-use crate::utils::load_env_var;
+use crate::metrics::{JobMetrics, download_auto_job_metrics, download_job_metrics, load_metrics};
+use crate::utils::{load_env_var, output_details};
 
 const CI_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/..");
 const DOCKER_DIRECTORY: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../docker");
@@ -137,6 +138,56 @@ fn upload_ci_metrics(cpu_usage_csv: &Path) -> anyhow::Result<()> {
     Ok(())
 }
 
+fn postprocess_metrics(
+    metrics_path: PathBuf,
+    parent: Option<String>,
+    job_name: Option<String>,
+) -> anyhow::Result<()> {
+    let metrics = load_metrics(&metrics_path)?;
+    output_bootstrap_stats(&metrics);
+
+    let (Some(parent), Some(job_name)) = (parent, job_name) else {
+        return Ok(());
+    };
+
+    // This command is executed also on PR builds, which might not have parent metrics
+    // available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics
+    // due to missing permissions.
+    // To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs,
+    // we simply print an error if the parent metrics were not found, but otherwise exit
+    // successfully.
+    match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") {
+        Ok(parent_metrics) => {
+            let job_metrics = HashMap::from([(
+                job_name,
+                JobMetrics { parent: Some(parent_metrics), current: metrics },
+            )]);
+            output_test_diffs(job_metrics);
+        }
+        Err(error) => {
+            eprintln!("Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}");
+        }
+    }
+
+    Ok(())
+}
+
+fn post_merge_report(db: JobDatabase, current: String, parent: String) -> anyhow::Result<()> {
+    let metrics = download_auto_job_metrics(&db, &parent, &current)?;
+
+    output_details("What is this?", || {
+        println!(
+            r#"This is an experimental post-merge analysis report that shows differences in
+test outcomes between the merged PR and its parent PR."#
+        );
+    });
+
+    println!("\nComparing {parent} (parent) -> {current} (this PR)\n");
+    output_test_diffs(metrics);
+
+    Ok(())
+}
+
 #[derive(clap::Parser)]
 enum Args {
     /// Calculate a list of jobs that should be executed on CI.
@@ -154,13 +205,19 @@ enum Args {
         #[clap(long = "type", default_value = "auto")]
         job_type: JobType,
     },
-    /// Postprocess the metrics.json file generated by bootstrap.
+    /// Postprocess the metrics.json file generated by bootstrap and output
+    /// various statistics.
+    /// If `--parent` and `--job-name` are provided, also display a diff
+    /// against previous metrics that are downloaded from CI.
     PostprocessMetrics {
         /// Path to the metrics.json file
         metrics_path: PathBuf,
-        /// Path to a file where the postprocessed metrics summary will be stored.
-        /// Usually, this will be GITHUB_STEP_SUMMARY on CI.
-        summary_path: PathBuf,
+        /// A parent SHA against which to compare.
+        #[clap(long, requires("job_name"))]
+        parent: Option<String>,
+        /// The name of the current job.
+        #[clap(long, requires("parent"))]
+        job_name: Option<String>,
     },
     /// Upload CI metrics to Datadog.
     UploadBuildMetrics {
@@ -211,11 +268,11 @@ fn main() -> anyhow::Result<()> {
         Args::UploadBuildMetrics { cpu_usage_csv } => {
             upload_ci_metrics(&cpu_usage_csv)?;
         }
-        Args::PostprocessMetrics { metrics_path, summary_path } => {
-            postprocess_metrics(&metrics_path, &summary_path)?;
+        Args::PostprocessMetrics { metrics_path, parent, job_name } => {
+            postprocess_metrics(metrics_path, parent, job_name)?;
         }
-        Args::PostMergeReport { current: commit, parent } => {
-            post_merge_report(load_db(default_jobs_file)?, parent, commit)?;
+        Args::PostMergeReport { current, parent } => {
+            post_merge_report(load_db(default_jobs_file)?, current, parent)?;
         }
     }
 
diff --git a/src/ci/citool/src/merge_report.rs b/src/ci/citool/src/merge_report.rs
deleted file mode 100644
index 62daa2e6853..00000000000
--- a/src/ci/citool/src/merge_report.rs
+++ /dev/null
@@ -1,318 +0,0 @@
-use std::collections::{HashMap, HashSet};
-use std::path::PathBuf;
-
-use anyhow::Context;
-use build_helper::metrics::{JsonRoot, TestOutcome, TestSuiteMetadata};
-
-use crate::jobs::JobDatabase;
-use crate::metrics::get_test_suites;
-
-type Sha = String;
-type JobName = String;
-
-/// Computes a post merge CI analysis report between the `parent` and `current` commits.
-pub fn post_merge_report(job_db: JobDatabase, parent: Sha, current: Sha) -> anyhow::Result<()> {
-    let jobs = download_all_metrics(&job_db, &parent, &current)?;
-    let aggregated_test_diffs = aggregate_test_diffs(&jobs)?;
-
-    println!("Comparing {parent} (base) -> {current} (this PR)\n");
-    report_test_diffs(aggregated_test_diffs);
-
-    Ok(())
-}
-
-struct JobMetrics {
-    parent: Option<JsonRoot>,
-    current: JsonRoot,
-}
-
-/// Download before/after metrics for all auto jobs in the job database.
-fn download_all_metrics(
-    job_db: &JobDatabase,
-    parent: &str,
-    current: &str,
-) -> anyhow::Result<HashMap<JobName, JobMetrics>> {
-    let mut jobs = HashMap::default();
-
-    for job in &job_db.auto_jobs {
-        eprintln!("Downloading metrics of job {}", job.name);
-        let metrics_parent = match download_job_metrics(&job.name, parent) {
-            Ok(metrics) => Some(metrics),
-            Err(error) => {
-                eprintln!(
-                    r#"Did not find metrics for job `{}` at `{}`: {error:?}.
-Maybe it was newly added?"#,
-                    job.name, parent
-                );
-                None
-            }
-        };
-        let metrics_current = download_job_metrics(&job.name, current)?;
-        jobs.insert(
-            job.name.clone(),
-            JobMetrics { parent: metrics_parent, current: metrics_current },
-        );
-    }
-    Ok(jobs)
-}
-
-/// Downloads job metrics of the given job for the given commit.
-/// Caches the result on the local disk.
-fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
-    let cache_path = PathBuf::from(".citool-cache").join(sha).join(job_name).join("metrics.json");
-    if let Some(cache_entry) =
-        std::fs::read_to_string(&cache_path).ok().and_then(|data| serde_json::from_str(&data).ok())
-    {
-        return Ok(cache_entry);
-    }
-
-    let url = get_metrics_url(job_name, sha);
-    let mut response = ureq::get(&url).call()?;
-    if !response.status().is_success() {
-        return Err(anyhow::anyhow!(
-            "Cannot fetch metrics from {url}: {}\n{}",
-            response.status(),
-            response.body_mut().read_to_string()?
-        ));
-    }
-    let data: JsonRoot = response
-        .body_mut()
-        .read_json()
-        .with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
-
-    // Ignore errors if cache cannot be created
-    if std::fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
-        if let Ok(serialized) = serde_json::to_string(&data) {
-            let _ = std::fs::write(&cache_path, &serialized);
-        }
-    }
-    Ok(data)
-}
-
-fn get_metrics_url(job_name: &str, sha: &str) -> String {
-    let suffix = if job_name.ends_with("-alt") { "-alt" } else { "" };
-    format!("https://ci-artifacts.rust-lang.org/rustc-builds{suffix}/{sha}/metrics-{job_name}.json")
-}
-
-/// Represents a difference in the outcome of tests between a base and a current commit.
-/// Maps test diffs to jobs that contained them.
-#[derive(Debug)]
-struct AggregatedTestDiffs {
-    diffs: HashMap<TestDiff, Vec<JobName>>,
-}
-
-fn aggregate_test_diffs(
-    jobs: &HashMap<JobName, JobMetrics>,
-) -> anyhow::Result<AggregatedTestDiffs> {
-    let mut diffs: HashMap<TestDiff, Vec<JobName>> = HashMap::new();
-
-    // Aggregate test suites
-    for (name, metrics) in jobs {
-        if let Some(parent) = &metrics.parent {
-            let tests_parent = aggregate_tests(parent);
-            let tests_current = aggregate_tests(&metrics.current);
-            for diff in calculate_test_diffs(tests_parent, tests_current) {
-                diffs.entry(diff).or_default().push(name.to_string());
-            }
-        }
-    }
-
-    Ok(AggregatedTestDiffs { diffs })
-}
-
-#[derive(Eq, PartialEq, Hash, Debug)]
-enum TestOutcomeDiff {
-    ChangeOutcome { before: TestOutcome, after: TestOutcome },
-    Missing { before: TestOutcome },
-    Added(TestOutcome),
-}
-
-#[derive(Eq, PartialEq, Hash, Debug)]
-struct TestDiff {
-    test: Test,
-    diff: TestOutcomeDiff,
-}
-
-fn calculate_test_diffs(parent: TestSuiteData, current: TestSuiteData) -> HashSet<TestDiff> {
-    let mut diffs = HashSet::new();
-    for (test, outcome) in &current.tests {
-        match parent.tests.get(test) {
-            Some(before) => {
-                if before != outcome {
-                    diffs.insert(TestDiff {
-                        test: test.clone(),
-                        diff: TestOutcomeDiff::ChangeOutcome {
-                            before: before.clone(),
-                            after: outcome.clone(),
-                        },
-                    });
-                }
-            }
-            None => {
-                diffs.insert(TestDiff {
-                    test: test.clone(),
-                    diff: TestOutcomeDiff::Added(outcome.clone()),
-                });
-            }
-        }
-    }
-    for (test, outcome) in &parent.tests {
-        if !current.tests.contains_key(test) {
-            diffs.insert(TestDiff {
-                test: test.clone(),
-                diff: TestOutcomeDiff::Missing { before: outcome.clone() },
-            });
-        }
-    }
-
-    diffs
-}
-
-/// Aggregates test suite executions from all bootstrap invocations in a given CI job.
-#[derive(Default)]
-struct TestSuiteData {
-    tests: HashMap<Test, TestOutcome>,
-}
-
-#[derive(Hash, PartialEq, Eq, Debug, Clone)]
-struct Test {
-    name: String,
-    is_doctest: bool,
-}
-
-/// Extracts all tests from the passed metrics and map them to their outcomes.
-fn aggregate_tests(metrics: &JsonRoot) -> TestSuiteData {
-    let mut tests = HashMap::new();
-    let test_suites = get_test_suites(&metrics);
-    for suite in test_suites {
-        for test in &suite.tests {
-            // Poor man's detection of doctests based on the "(line XYZ)" suffix
-            let is_doctest = matches!(suite.metadata, TestSuiteMetadata::CargoPackage { .. })
-                && test.name.contains("(line");
-            let test_entry = Test { name: normalize_test_name(&test.name), is_doctest };
-            tests.insert(test_entry, test.outcome.clone());
-        }
-    }
-    TestSuiteData { tests }
-}
-
-/// Normalizes Windows-style path delimiters to Unix-style paths.
-fn normalize_test_name(name: &str) -> String {
-    name.replace('\\', "/")
-}
-
-/// Prints test changes in Markdown format to stdout.
-fn report_test_diffs(diff: AggregatedTestDiffs) {
-    println!("## Test differences");
-    if diff.diffs.is_empty() {
-        println!("No test diffs found");
-        return;
-    }
-
-    fn format_outcome(outcome: &TestOutcome) -> String {
-        match outcome {
-            TestOutcome::Passed => "pass".to_string(),
-            TestOutcome::Failed => "fail".to_string(),
-            TestOutcome::Ignored { ignore_reason } => {
-                let reason = match ignore_reason {
-                    Some(reason) => format!(" ({reason})"),
-                    None => String::new(),
-                };
-                format!("ignore{reason}")
-            }
-        }
-    }
-
-    fn format_diff(diff: &TestOutcomeDiff) -> String {
-        match diff {
-            TestOutcomeDiff::ChangeOutcome { before, after } => {
-                format!("{} -> {}", format_outcome(before), format_outcome(after))
-            }
-            TestOutcomeDiff::Missing { before } => {
-                format!("{} -> [missing]", format_outcome(before))
-            }
-            TestOutcomeDiff::Added(outcome) => {
-                format!("[missing] -> {}", format_outcome(outcome))
-            }
-        }
-    }
-
-    fn format_job_group(group: u64) -> String {
-        format!("**J{group}**")
-    }
-
-    // It would be quite noisy to repeat the jobs that contained the test changes after/next to
-    // every test diff. At the same time, grouping the test diffs by
-    // [unique set of jobs that contained them] also doesn't work well, because the test diffs
-    // would have to be duplicated several times.
-    // Instead, we create a set of unique job groups, and then print a job group after each test.
-    // We then print the job groups at the end, as a sort of index.
-    let mut grouped_diffs: Vec<(&TestDiff, u64)> = vec![];
-    let mut job_list_to_group: HashMap<&[JobName], u64> = HashMap::new();
-    let mut job_index: Vec<&[JobName]> = vec![];
-
-    let original_diff_count = diff.diffs.len();
-    let diffs = diff
-        .diffs
-        .into_iter()
-        .filter(|(diff, _)| !diff.test.is_doctest)
-        .map(|(diff, mut jobs)| {
-            jobs.sort();
-            (diff, jobs)
-        })
-        .collect::<Vec<_>>();
-    let doctest_count = original_diff_count.saturating_sub(diffs.len());
-
-    let max_diff_count = 100;
-    for (diff, jobs) in diffs.iter().take(max_diff_count) {
-        let jobs = &*jobs;
-        let job_group = match job_list_to_group.get(jobs.as_slice()) {
-            Some(id) => *id,
-            None => {
-                let id = job_index.len() as u64;
-                job_index.push(jobs);
-                job_list_to_group.insert(jobs, id);
-                id
-            }
-        };
-        grouped_diffs.push((diff, job_group));
-    }
-
-    // Sort diffs by job group and test name
-    grouped_diffs.sort_by(|(d1, g1), (d2, g2)| g1.cmp(&g2).then(d1.test.name.cmp(&d2.test.name)));
-
-    for (diff, job_group) in grouped_diffs {
-        println!(
-            "- `{}`: {} ({})",
-            diff.test.name,
-            format_diff(&diff.diff),
-            format_job_group(job_group)
-        );
-    }
-
-    let extra_diffs = diffs.len().saturating_sub(max_diff_count);
-    if extra_diffs > 0 {
-        println!("\n(and {extra_diffs} additional {})", pluralize("test diff", extra_diffs));
-    }
-
-    if doctest_count > 0 {
-        println!(
-            "\nAdditionally, {doctest_count} doctest {} were found. These are ignored, as they are noisy.",
-            pluralize("diff", doctest_count)
-        );
-    }
-
-    // Now print the job group index
-    println!("\n**Job group index**\n");
-    for (group, jobs) in job_index.into_iter().enumerate() {
-        println!(
-            "- {}: {}",
-            format_job_group(group as u64),
-            jobs.iter().map(|j| format!("`{j}`")).collect::<Vec<_>>().join(", ")
-        );
-    }
-}
-
-fn pluralize(text: &str, count: usize) -> String {
-    if count == 1 { text.to_string() } else { format!("{text}s") }
-}
diff --git a/src/ci/citool/src/metrics.rs b/src/ci/citool/src/metrics.rs
index 83b3d5ceed0..086aa5009f3 100644
--- a/src/ci/citool/src/metrics.rs
+++ b/src/ci/citool/src/metrics.rs
@@ -1,146 +1,12 @@
-use std::collections::BTreeMap;
-use std::fs::File;
-use std::io::Write;
+use std::collections::HashMap;
 use std::path::Path;
 
 use anyhow::Context;
-use build_helper::metrics::{
-    BuildStep, JsonNode, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, format_build_steps,
-};
+use build_helper::metrics::{JsonNode, JsonRoot, TestSuite};
 
-pub fn postprocess_metrics(metrics_path: &Path, summary_path: &Path) -> anyhow::Result<()> {
-    let metrics = load_metrics(metrics_path)?;
+use crate::jobs::JobDatabase;
 
-    let mut file = File::options()
-        .append(true)
-        .create(true)
-        .open(summary_path)
-        .with_context(|| format!("Cannot open summary file at {summary_path:?}"))?;
-
-    if !metrics.invocations.is_empty() {
-        writeln!(file, "# Bootstrap steps")?;
-        record_bootstrap_step_durations(&metrics, &mut file)?;
-        record_test_suites(&metrics, &mut file)?;
-    }
-
-    Ok(())
-}
-
-fn record_bootstrap_step_durations(metrics: &JsonRoot, file: &mut File) -> anyhow::Result<()> {
-    for invocation in &metrics.invocations {
-        let step = BuildStep::from_invocation(invocation);
-        let table = format_build_steps(&step);
-        eprintln!("Step `{}`\n{table}\n", invocation.cmdline);
-        writeln!(
-            file,
-            r"<details>
-<summary>{}</summary>
-<pre><code>{table}</code></pre>
-</details>
-",
-            invocation.cmdline
-        )?;
-    }
-    eprintln!("Recorded {} bootstrap invocation(s)", metrics.invocations.len());
-
-    Ok(())
-}
-
-fn record_test_suites(metrics: &JsonRoot, file: &mut File) -> anyhow::Result<()> {
-    let suites = get_test_suites(&metrics);
-
-    if !suites.is_empty() {
-        let aggregated = aggregate_test_suites(&suites);
-        let table = render_table(aggregated);
-        writeln!(file, "\n# Test results\n")?;
-        writeln!(file, "{table}")?;
-    } else {
-        eprintln!("No test suites found in metrics");
-    }
-
-    Ok(())
-}
-
-fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
-    use std::fmt::Write;
-
-    let mut table = "| Test suite | Passed ✅ | Ignored 🚫 | Failed  ❌ |\n".to_string();
-    writeln!(table, "|:------|------:|------:|------:|").unwrap();
-
-    fn compute_pct(value: f64, total: f64) -> f64 {
-        if total == 0.0 { 0.0 } else { value / total }
-    }
-
-    fn write_row(
-        buffer: &mut String,
-        name: &str,
-        record: &TestSuiteRecord,
-        surround: &str,
-    ) -> std::fmt::Result {
-        let TestSuiteRecord { passed, ignored, failed } = record;
-        let total = (record.passed + record.ignored + record.failed) as f64;
-        let passed_pct = compute_pct(*passed as f64, total) * 100.0;
-        let ignored_pct = compute_pct(*ignored as f64, total) * 100.0;
-        let failed_pct = compute_pct(*failed as f64, total) * 100.0;
-
-        write!(buffer, "| {surround}{name}{surround} |")?;
-        write!(buffer, " {surround}{passed} ({passed_pct:.0}%){surround} |")?;
-        write!(buffer, " {surround}{ignored} ({ignored_pct:.0}%){surround} |")?;
-        writeln!(buffer, " {surround}{failed} ({failed_pct:.0}%){surround} |")?;
-
-        Ok(())
-    }
-
-    let mut total = TestSuiteRecord::default();
-    for (name, record) in suites {
-        write_row(&mut table, &name, &record, "").unwrap();
-        total.passed += record.passed;
-        total.ignored += record.ignored;
-        total.failed += record.failed;
-    }
-    write_row(&mut table, "Total", &total, "**").unwrap();
-    table
-}
-
-#[derive(Default)]
-struct TestSuiteRecord {
-    passed: u64,
-    ignored: u64,
-    failed: u64,
-}
-
-fn test_metadata_name(metadata: &TestSuiteMetadata) -> String {
-    match metadata {
-        TestSuiteMetadata::CargoPackage { crates, stage, .. } => {
-            format!("{} (stage {stage})", crates.join(", "))
-        }
-        TestSuiteMetadata::Compiletest { suite, stage, .. } => {
-            format!("{suite} (stage {stage})")
-        }
-    }
-}
-
-fn aggregate_test_suites(suites: &[&TestSuite]) -> BTreeMap<String, TestSuiteRecord> {
-    let mut records: BTreeMap<String, TestSuiteRecord> = BTreeMap::new();
-    for suite in suites {
-        let name = test_metadata_name(&suite.metadata);
-        let record = records.entry(name).or_default();
-        for test in &suite.tests {
-            match test.outcome {
-                TestOutcome::Passed => {
-                    record.passed += 1;
-                }
-                TestOutcome::Failed => {
-                    record.failed += 1;
-                }
-                TestOutcome::Ignored { .. } => {
-                    record.ignored += 1;
-                }
-            }
-        }
-    }
-    records
-}
+pub type JobName = String;
 
 pub fn get_test_suites(metrics: &JsonRoot) -> Vec<&TestSuite> {
     fn visit_test_suites<'a>(nodes: &'a [JsonNode], suites: &mut Vec<&'a TestSuite>) {
@@ -163,10 +29,68 @@ pub fn get_test_suites(metrics: &JsonRoot) -> Vec<&TestSuite> {
     suites
 }
 
-fn load_metrics(path: &Path) -> anyhow::Result<JsonRoot> {
+pub fn load_metrics(path: &Path) -> anyhow::Result<JsonRoot> {
     let metrics = std::fs::read_to_string(path)
         .with_context(|| format!("Cannot read JSON metrics from {path:?}"))?;
     let metrics: JsonRoot = serde_json::from_str(&metrics)
         .with_context(|| format!("Cannot deserialize JSON metrics from {path:?}"))?;
     Ok(metrics)
 }
+
+pub struct JobMetrics {
+    pub parent: Option<JsonRoot>,
+    pub current: JsonRoot,
+}
+
+/// Download before/after metrics for all auto jobs in the job database.
+/// `parent` and `current` should be commit SHAs.
+pub fn download_auto_job_metrics(
+    job_db: &JobDatabase,
+    parent: &str,
+    current: &str,
+) -> anyhow::Result<HashMap<JobName, JobMetrics>> {
+    let mut jobs = HashMap::default();
+
+    for job in &job_db.auto_jobs {
+        eprintln!("Downloading metrics of job {}", job.name);
+        let metrics_parent = match download_job_metrics(&job.name, parent) {
+            Ok(metrics) => Some(metrics),
+            Err(error) => {
+                eprintln!(
+                    r#"Did not find metrics for job `{}` at `{parent}`: {error:?}.
+Maybe it was newly added?"#,
+                    job.name
+                );
+                None
+            }
+        };
+        let metrics_current = download_job_metrics(&job.name, current)?;
+        jobs.insert(
+            job.name.clone(),
+            JobMetrics { parent: metrics_parent, current: metrics_current },
+        );
+    }
+    Ok(jobs)
+}
+
+pub fn download_job_metrics(job_name: &str, sha: &str) -> anyhow::Result<JsonRoot> {
+    let url = get_metrics_url(job_name, sha);
+    let mut response = ureq::get(&url).call()?;
+    if !response.status().is_success() {
+        return Err(anyhow::anyhow!(
+            "Cannot fetch metrics from {url}: {}\n{}",
+            response.status(),
+            response.body_mut().read_to_string()?
+        ));
+    }
+    let data: JsonRoot = response
+        .body_mut()
+        .read_json()
+        .with_context(|| anyhow::anyhow!("cannot deserialize metrics from {url}"))?;
+    Ok(data)
+}
+
+fn get_metrics_url(job_name: &str, sha: &str) -> String {
+    let suffix = if job_name.ends_with("-alt") { "-alt" } else { "" };
+    format!("https://ci-artifacts.rust-lang.org/rustc-builds{suffix}/{sha}/metrics-{job_name}.json")
+}
diff --git a/src/ci/citool/src/utils.rs b/src/ci/citool/src/utils.rs
index 9cc220987bd..b9b1bf4d455 100644
--- a/src/ci/citool/src/utils.rs
+++ b/src/ci/citool/src/utils.rs
@@ -9,3 +9,23 @@ pub fn load_env_var(name: &str) -> anyhow::Result<String> {
 pub fn read_to_string<P: AsRef<Path>>(path: P) -> anyhow::Result<String> {
     std::fs::read_to_string(&path).with_context(|| format!("Cannot read file {:?}", path.as_ref()))
 }
+
+pub fn pluralize(text: &str, count: usize) -> String {
+    if count == 1 { text.to_string() } else { format!("{text}s") }
+}
+
+/// Outputs a HTML <details> section with the provided summary.
+/// Output printed by `func` will be contained within the section.
+pub fn output_details<F>(summary: &str, func: F)
+where
+    F: FnOnce(),
+{
+    println!(
+        r"<details>
+<summary>{summary}</summary>
+
+"
+    );
+    func();
+    println!("</details>\n");
+}