about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatthias Krüger <matthias.krueger@famsik.de>2022-08-02 23:07:44 +0200
committerGitHub <noreply@github.com>2022-08-02 23:07:44 +0200
commit63cd10154dac82503c446baf8eb1b2abd3dac3ff (patch)
treefcf3bb727fbcd8c8242a175052210af7ea6f2d6f
parent4493a0f4724c0bae1436242d76cccc9c0a287b80 (diff)
parent5b0ec1ebe4da106c18ce1ceec76e4adc627bddd1 (diff)
downloadrust-63cd10154dac82503c446baf8eb1b2abd3dac3ff.tar.gz
rust-63cd10154dac82503c446baf8eb1b2abd3dac3ff.zip
Rollup merge of #99933 - alex:parallel-html-checking, r=Mark-Simulacrum
parallelize HTML checking tool

there's a lot of IO, so timings on my laptop are far from stable, but it seems to be considerably faster.

this step often appears to take 5+ minutes in CI, so hopefully this offers a speedup
-rw-r--r--Cargo.lock1
-rw-r--r--src/tools/html-checker/Cargo.toml1
-rw-r--r--src/tools/html-checker/main.rs46
3 files changed, 27 insertions, 21 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 058b6198dc4..b07da064404 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1788,6 +1788,7 @@ dependencies = [
 name = "html-checker"
 version = "0.1.0"
 dependencies = [
+ "rayon",
  "walkdir",
 ]
 
diff --git a/src/tools/html-checker/Cargo.toml b/src/tools/html-checker/Cargo.toml
index 34d3954db28..72d61d9bd26 100644
--- a/src/tools/html-checker/Cargo.toml
+++ b/src/tools/html-checker/Cargo.toml
@@ -9,3 +9,4 @@ path = "main.rs"
 
 [dependencies]
 walkdir = "2"
+rayon = "1.5"
diff --git a/src/tools/html-checker/main.rs b/src/tools/html-checker/main.rs
index f52fbdfe2d7..9b4d2c52598 100644
--- a/src/tools/html-checker/main.rs
+++ b/src/tools/html-checker/main.rs
@@ -1,3 +1,4 @@
+use rayon::iter::{ParallelBridge, ParallelIterator};
 use std::env;
 use std::path::Path;
 use std::process::{Command, Output};
@@ -56,27 +57,30 @@ const DOCS_TO_CHECK: &[&str] =
 
 // Returns the number of files read and the number of errors.
 fn find_all_html_files(dir: &Path) -> (usize, usize) {
-    let mut files_read = 0;
-    let mut errors = 0;
-
-    for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| {
-        e.depth() != 1
-            || e.file_name()
-                .to_str()
-                .map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
-                .unwrap_or(false)
-    }) {
-        let entry = entry.expect("failed to read file");
-        if !entry.file_type().is_file() {
-            continue;
-        }
-        let entry = entry.path();
-        if entry.extension().and_then(|s| s.to_str()) == Some("html") {
-            errors += check_html_file(&entry);
-            files_read += 1;
-        }
-    }
-    (files_read, errors)
+    walkdir::WalkDir::new(dir)
+        .into_iter()
+        .filter_entry(|e| {
+            e.depth() != 1
+                || e.file_name()
+                    .to_str()
+                    .map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s))
+                    .unwrap_or(false)
+        })
+        .par_bridge()
+        .map(|entry| {
+            let entry = entry.expect("failed to read file");
+            if !entry.file_type().is_file() {
+                return (0, 0);
+            }
+            let entry = entry.path();
+            // (Number of files processed, number of errors)
+            if entry.extension().and_then(|s| s.to_str()) == Some("html") {
+                (1, check_html_file(&entry))
+            } else {
+                (0, 0)
+            }
+        })
+        .reduce(|| (0, 0), |a, b| (a.0 + b.0, a.1 + b.1))
 }
 
 /// Default `tidy` command for macOS is too old that it does not have `mute-id` and `mute` options.