diff options
| author | Guillaume Gomez <guillaume1.gomez@gmail.com> | 2021-04-23 16:43:18 +0200 |
|---|---|---|
| committer | Guillaume Gomez <guillaume1.gomez@gmail.com> | 2021-06-28 18:05:15 +0200 |
| commit | 83a2bc31b9f68d8ba5fe2854bf38df5e564c575b (patch) | |
| tree | 6c0a994f3275f773f35c442d10a3aee1f3607136 /src/tools/html-checker | |
| parent | 451e98e7b02c8bf5e3bd5c9e780d51f7986a4408 (diff) | |
| download | rust-83a2bc31b9f68d8ba5fe2854bf38df5e564c575b.tar.gz rust-83a2bc31b9f68d8ba5fe2854bf38df5e564c575b.zip | |
Add new tool to check HTML:
* Make html-checker run by default on rust compiler docs as well * Ensure html-checker is run on CI * Lazify tidy binary presence check
Diffstat (limited to 'src/tools/html-checker')
| -rw-r--r-- | src/tools/html-checker/Cargo.toml | 12 | ||||
| -rw-r--r-- | src/tools/html-checker/main.rs | 96 |
2 files changed, 108 insertions, 0 deletions
diff --git a/src/tools/html-checker/Cargo.toml b/src/tools/html-checker/Cargo.toml new file mode 100644 index 00000000000..fe35df823b6 --- /dev/null +++ b/src/tools/html-checker/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "html-checker" +version = "0.1.0" +authors = ["Guillaume Gomez <guillaume1.gomez@gmail.com>"] +edition = "2018" + +[[bin]] +name = "html-checker" +path = "main.rs" + +[dependencies] +walkdir = "2" diff --git a/src/tools/html-checker/main.rs b/src/tools/html-checker/main.rs new file mode 100644 index 00000000000..a93191191cc --- /dev/null +++ b/src/tools/html-checker/main.rs @@ -0,0 +1,96 @@ +use std::env; +use std::path::Path; +use std::process::{Command, Output}; + +fn check_html_file(file: &Path) -> usize { + let to_mute = &[ + // "disabled" on <link> or "autocomplete" on <select> emit this warning + "PROPRIETARY_ATTRIBUTE", + // It complains when multiple in the same page link to the same anchor for some reason... + "ANCHOR_NOT_UNIQUE", + // If a <span> contains only HTML elements and no text, it complains about it. + "TRIM_EMPTY_ELEMENT", + // FIXME: the three next warnings are about <pre> elements which are not supposed to + // contain HTML. The solution here would be to replace them with a <div> with + // "" + "MISSING_ENDTAG_BEFORE", + "INSERTING_TAG", + "DISCARDING_UNEXPECTED", + // FIXME: mdbook repeats the name attribute on <input>. When the fix is merged upstream, + // this warning can be used again. + "REPEATED_ATTRIBUTE", + // FIXME: mdbook uses "align" attribute on <td>, which is not allowed. + "MISMATCHED_ATTRIBUTE_WARN", + // FIXME: mdbook doesn't add "alt" attribute on images. + "MISSING_ATTRIBUTE", + // FIXME: mdbook doesn't escape `&` (in "&String" for example). + "UNKNOWN_ENTITY", + // Compiler docs have some inlined <style> in the markdown. + "MOVED_STYLE_TO_HEAD", + ]; + let to_mute_s = to_mute.join(","); + let mut command = Command::new("tidy"); + command + .arg("-errors") + .arg("-quiet") + .arg("--mute-id") // this option is useful in case we want to mute more warnings + .arg("yes") + .arg("--mute") + .arg(&to_mute_s) + .arg(file); + + let Output { status, stderr, .. } = command.output().expect("failed to run tidy command"); + if status.success() { + 0 + } else { + let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed..."); + if stderr.is_empty() && status.code() != Some(2) { + 0 + } else { + eprintln!( + "=> Errors for `{}` (error code: {}) <=", + file.display(), + status.code().unwrap_or(-1) + ); + eprintln!("{}", stderr); + stderr.lines().count() + } + } +} + +// Returns the number of files read and the number of errors. +fn find_all_html_files(dir: &Path) -> (usize, usize) { + let mut files_read = 0; + let mut errors = 0; + + for entry in walkdir::WalkDir::new(dir) { + let entry = entry.expect("failed to read file"); + if !entry.file_type().is_file() { + continue; + } + let entry = entry.path(); + if entry.extension().and_then(|s| s.to_str()) == Some("html") { + errors += check_html_file(&entry); + files_read += 1; + } + } + (files_read, errors) +} + +fn main() -> Result<(), String> { + let args = env::args().collect::<Vec<_>>(); + if args.len() != 2 { + return Err(format!("Usage: {} <doc folder>", args[0])); + } + + println!("Running HTML checker..."); + + let (files_read, errors) = find_all_html_files(&Path::new(&args[1])); + println!("Done! Read {} files...", files_read); + if errors > 0 { + Err(format!("HTML check failed: {} errors", errors)) + } else { + println!("No error found!"); + Ok(()) + } +} |
