Auto merge of #13139 - xFrednet:lintcheck-limit-summery-output, r=Alexendoo

Lintcheck: Rework and limit diff output for GH's CI ### Background While working on https://github.com/rust-lang/rust-clippy/pull/13136 I found an amazing limitation of GH's CI. The summary can at most have be 1MB of text. Here is the warning message: > $GITHUB_STEP_SUMMARY upload aborted, supports content up to a size of 1024k, got 46731k. For more information see: https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary [The PR](https://github.com/rust-lang/rust-clippy/pull/13136) produced a *casual* 61808 changes. Guess that's why those lints are not *warn-by-default* :P. ### Changes: This PR limits the lintcheck diff output in two ways. 1. The diff is limited to 200 messages per lint per section. Hidden messages are indicated by a message at the end of the section. 2. The output is first written to a file and only the first 1MB is written to ` >> $GITHUB_STEP_SUMMARY`. The entire file is also written to the normal CI log. This helps for cases where several lints change and the total size exceeds the 1MB limit. An example of these changes can be seen here: https://github.com/xFrednet/rust-clippy/actions/runs/10028799118?pr=4 --- changelog: none r? `@Alexendoo` Sorry for bombarding you with so many PR's lately :sweat_smile: Feel free to pass some of you reviews to me.
author: bors <bors@rust-lang.org> 2024-07-25 12:11:35 +0000
committer: bors <bors@rust-lang.org> 2024-07-25 12:11:35 +0000
commit: be8234098b731bcac17304333c62fba8a525da77 (patch)
tree: e97df1a78ffc17d1d069911f9dbc6731c0428766
parent: 5e6540f04979062a9bd433420674149909e0cd87 (diff)
parent: bdf3e585a3c04eb2f40ae3dbc8815b683cb55eae (diff)
download: rust-be8234098b731bcac17304333c62fba8a525da77.tar.gz
rust-be8234098b731bcac17304333c62fba8a525da77.zip
5 files changed, 167 insertions, 43 deletions
diff --git a/.github/workflows/lintcheck.yml b/.github/workflows/lintcheck.yml
index 46fd3089f44..6a5139b6dc0 100644
--- a/.github/workflows/lintcheck.yml
+++ b/.github/workflows/lintcheck.yml
@@ -115,4 +115,20 @@ jobs:
       uses: actions/download-artifact@v4
 
     - name: Diff results
-      run: ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> $GITHUB_STEP_SUMMARY
+      # GH's summery has a maximum size of 1024k:
+      # https://docs.github.com/actions/using-workflows/workflow-commands-for-github-actions#adding-a-markdown-summary
+      # That's why we first log to file and then to the summary and logs
+      run: |
+        ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json --truncate >> truncated_diff.md
+        head -c 1024000 truncated_diff.md >> $GITHUB_STEP_SUMMARY
+        cat truncated_diff.md
+        ./target/debug/lintcheck diff {base,head}/ci_crates_logs.json >> full_diff.md
+
+    - name: Upload full diff
+      uses: actions/upload-artifact@v4
+      with:
+        name: diff
+        if-no-files-found: ignore
+        path: |
+          full_diff.md
+          truncated_diff.md
diff --git a/lintcheck/Cargo.toml b/lintcheck/Cargo.toml
index 3c86dfe324f..350418eeeb8 100644
--- a/lintcheck/Cargo.toml
+++ b/lintcheck/Cargo.toml
@@ -16,7 +16,7 @@ clap = { version = "4.4", features = ["derive", "env"] }
 crossbeam-channel = "0.5.6"
 diff = "0.1.13"
 flate2 = "1.0"
-itertools = "0.12"
+itertools = "0.13"
 rayon = "1.5.1"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.85"
diff --git a/lintcheck/src/config.rs b/lintcheck/src/config.rs
index b35a62eed44..6bec1753fc7 100644
--- a/lintcheck/src/config.rs
+++ b/lintcheck/src/config.rs
@@ -53,7 +53,13 @@ pub(crate) struct LintcheckConfig {
 #[derive(Subcommand, Clone, Debug)]
 pub(crate) enum Commands {
     /// Display a markdown diff between two lintcheck log files in JSON format
-    Diff { old: PathBuf, new: PathBuf },
+    Diff {
+        old: PathBuf,
+        new: PathBuf,
+        /// This will limit the number of warnings that will be printed for each lint
+        #[clap(long)]
+        truncate: bool,
+    },
     /// Create a lintcheck crates TOML file containing the top N popular crates
     Popular {
         /// Output TOML file name
diff --git a/lintcheck/src/json.rs b/lintcheck/src/json.rs
index 74f36f000bd..4211bce90b2 100644
--- a/lintcheck/src/json.rs
+++ b/lintcheck/src/json.rs
@@ -1,12 +1,16 @@
 use std::fs;
 use std::path::Path;
 
-use itertools::EitherOrBoth;
+use itertools::{EitherOrBoth, Itertools};
 use serde::{Deserialize, Serialize};
 
 use crate::ClippyWarning;
 
-#[derive(Deserialize, Serialize)]
+/// This is the total number. 300 warnings results in 100 messages per section.
+const DEFAULT_LIMIT_PER_LINT: usize = 300;
+const TRUNCATION_TOTAL_TARGET: usize = 1000;
+
+#[derive(Debug, Deserialize, Serialize)]
 struct LintJson {
     lint: String,
     krate: String,
@@ -18,7 +22,7 @@ struct LintJson {
 
 impl LintJson {
     fn key(&self) -> impl Ord + '_ {
-        (self.file_name.as_str(), self.byte_pos, self.lint.as_str())
+        (self.lint.as_str(), self.file_name.as_str(), self.byte_pos)
     }
 
     fn info_text(&self, action: &str) -> String {
@@ -52,14 +56,117 @@ fn load_warnings(path: &Path) -> Vec<LintJson> {
     serde_json::from_slice(&file).unwrap_or_else(|e| panic!("failed to deserialize {}: {e}", path.display()))
 }
 
-fn print_warnings(title: &str, warnings: &[LintJson]) {
+pub(crate) fn diff(old_path: &Path, new_path: &Path, truncate: bool) {
+    let old_warnings = load_warnings(old_path);
+    let new_warnings = load_warnings(new_path);
+
+    let mut lint_warnings = vec![];
+
+    for (name, changes) in &itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key()))
+        .chunk_by(|change| change.as_ref().into_left().lint.to_string())
+    {
+        let mut added = Vec::new();
+        let mut removed = Vec::new();
+        let mut changed = Vec::new();
+        for change in changes {
+            match change {
+                EitherOrBoth::Both(old, new) => {
+                    if old.rendered != new.rendered {
+                        changed.push((old, new));
+                    }
+                },
+                EitherOrBoth::Left(old) => removed.push(old),
+                EitherOrBoth::Right(new) => added.push(new),
+            }
+        }
+
+        if !added.is_empty() || !removed.is_empty() || !changed.is_empty() {
+            lint_warnings.push(LintWarnings {
+                name,
+                added,
+                removed,
+                changed,
+            });
+        }
+    }
+
+    print_summary_table(&lint_warnings);
+    println!();
+
+    if lint_warnings.is_empty() {
+        return;
+    }
+
+    let truncate_after = if truncate {
+        // Max 15 ensures that we at least have five messages per lint
+        DEFAULT_LIMIT_PER_LINT
+            .min(TRUNCATION_TOTAL_TARGET / lint_warnings.len())
+            .max(15)
+    } else {
+        // No lint should ever each this number of lint emissions, so this is equivialent to
+        // No truncation
+        usize::MAX
+    };
+
+    for lint in lint_warnings {
+        print_lint_warnings(&lint, truncate_after);
+    }
+}
+
+#[derive(Debug)]
+struct LintWarnings {
+    name: String,
+    added: Vec<LintJson>,
+    removed: Vec<LintJson>,
+    changed: Vec<(LintJson, LintJson)>,
+}
+
+fn print_lint_warnings(lint: &LintWarnings, truncate_after: usize) {
+    let name = &lint.name;
+    let html_id = to_html_id(name);
+
+    // The additional anchor is added for non GH viewers that don't prefix ID's
+    println!(r#"## `{name}` <a id="user-content-{html_id}"></a>"#);
+    println!();
+
+    print!(
+        r##"{}, {}, {}"##,
+        count_string(name, "added", lint.added.len()),
+        count_string(name, "removed", lint.removed.len()),
+        count_string(name, "changed", lint.changed.len()),
+    );
+    println!();
+
+    print_warnings("Added", &lint.added, truncate_after / 3);
+    print_warnings("Removed", &lint.removed, truncate_after / 3);
+    print_changed_diff(&lint.changed, truncate_after / 3);
+}
+
+fn print_summary_table(lints: &[LintWarnings]) {
+    println!("| Lint                                       | Added   | Removed | Changed |");
+    println!("| ------------------------------------------ | ------: | ------: | ------: |");
+
+    for lint in lints {
+        println!(
+            "| {:<62} | {:>7} | {:>7} | {:>7} |",
+            format!("[`{}`](#user-content-{})", lint.name, to_html_id(&lint.name)),
+            lint.added.len(),
+            lint.removed.len(),
+            lint.changed.len()
+        );
+    }
+}
+
+fn print_warnings(title: &str, warnings: &[LintJson], truncate_after: usize) {
     if warnings.is_empty() {
         return;
     }
 
-    //We have to use HTML here to be able to manually add an id.
-    println!(r#"<h3 id="{title}">{title}</h3>"#);
+    print_h3(&warnings[0].lint, title);
     println!();
+
+    let warnings = truncate(warnings, truncate_after);
+
     for warning in warnings {
         println!("{}", warning.info_text(title));
         println!();
@@ -70,14 +177,16 @@ fn print_warnings(title: &str, warnings: &[LintJson]) {
     }
 }
 
-fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
+fn print_changed_diff(changed: &[(LintJson, LintJson)], truncate_after: usize) {
     if changed.is_empty() {
         return;
     }
 
-    //We have to use HTML here to be able to manually add an id.
-    println!(r#"<h3 id="changed">Changed</h3>"#);
+    print_h3(&changed[0].0.lint, "Changed");
     println!();
+
+    let changed = truncate(changed, truncate_after);
+
     for (old, new) in changed {
         println!("{}", new.info_text("Changed"));
         println!();
@@ -101,51 +210,44 @@ fn print_changed_diff(changed: &[(LintJson, LintJson)]) {
     }
 }
 
-pub(crate) fn diff(old_path: &Path, new_path: &Path) {
-    let old_warnings = load_warnings(old_path);
-    let new_warnings = load_warnings(new_path);
+fn truncate<T>(list: &[T], truncate_after: usize) -> &[T] {
+    if list.len() > truncate_after {
+        println!(
+            "{} warnings have been truncated for this summary.",
+            list.len() - truncate_after
+        );
+        println!();
 
-    let mut added = Vec::new();
-    let mut removed = Vec::new();
-    let mut changed = Vec::new();
-
-    for change in itertools::merge_join_by(old_warnings, new_warnings, |old, new| old.key().cmp(&new.key())) {
-        match change {
-            EitherOrBoth::Both(old, new) => {
-                if old.rendered != new.rendered {
-                    changed.push((old, new));
-                }
-            },
-            EitherOrBoth::Left(old) => removed.push(old),
-            EitherOrBoth::Right(new) => added.push(new),
-        }
+        list.split_at(truncate_after).0
+    } else {
+        list
     }
+}
 
-    print!(
-        r##"{}, {}, {}"##,
-        count_string("added", added.len()),
-        count_string("removed", removed.len()),
-        count_string("changed", changed.len()),
-    );
-    println!();
-    println!();
+fn print_h3(lint: &str, title: &str) {
+    let html_id = to_html_id(lint);
+    // We have to use HTML here to be able to manually add an id.
+    println!(r#"### {title} <a id="user-content-{html_id}-{title}"></a>"#);
+}
 
-    print_warnings("Added", &added);
-    print_warnings("Removed", &removed);
-    print_changed_diff(&changed);
+/// GitHub's markdown parsers doesn't like IDs with `::` and `_`. This simplifies
+/// the lint name for the HTML ID.
+fn to_html_id(lint_name: &str) -> String {
+    lint_name.replace("clippy::", "").replace('_', "-")
 }
 
 /// This generates the `x added` string for the start of the job summery.
 /// It linkifies them if possible to jump to the respective heading.
-fn count_string(label: &str, count: usize) -> String {
+fn count_string(lint: &str, label: &str, count: usize) -> String {
     // Headlines are only added, if anything will be displayed under the headline.
     // We therefore only want to add links to them if they exist
     if count == 0 {
         format!("0 {label}")
     } else {
+        let html_id = to_html_id(lint);
         // GitHub's job summaries don't add HTML ids to headings. That's why we
         // manually have to add them. GitHub prefixes these manual ids with
         // `user-content-` and that's how we end up with these awesome links :D
-        format!("[{count} {label}](#user-content-{label})")
+        format!("[{count} {label}](#user-content-{html_id}-{label})")
     }
 }
diff --git a/lintcheck/src/main.rs b/lintcheck/src/main.rs
index db1ecb9e663..0dd62ded293 100644
--- a/lintcheck/src/main.rs
+++ b/lintcheck/src/main.rs
@@ -260,7 +260,7 @@ fn main() {
     let config = LintcheckConfig::new();
 
     match config.subcommand {
-        Some(Commands::Diff { old, new }) => json::diff(&old, &new),
+        Some(Commands::Diff { old, new, truncate }) => json::diff(&old, &new, truncate),
         Some(Commands::Popular { output, number }) => popular_crates::fetch(output, number).unwrap(),
         None => lintcheck(config),
     }
author	bors <bors@rust-lang.org>	2024-07-25 12:11:35 +0000
committer	bors <bors@rust-lang.org>	2024-07-25 12:11:35 +0000
commit	be8234098b731bcac17304333c62fba8a525da77 (patch)
tree	e97df1a78ffc17d1d069911f9dbc6731c0428766
parent	5e6540f04979062a9bd433420674149909e0cd87 (diff)
parent	bdf3e585a3c04eb2f40ae3dbc8815b683cb55eae (diff)
download	rust-be8234098b731bcac17304333c62fba8a525da77.tar.gz rust-be8234098b731bcac17304333c62fba8a525da77.zip