about summary refs log tree commit diff
path: root/src/tools/linkchecker/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/linkchecker/main.rs')
-rw-r--r--src/tools/linkchecker/main.rs118
1 files changed, 93 insertions, 25 deletions
diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs
index 84cba3f8c44..1dc45728c90 100644
--- a/src/tools/linkchecker/main.rs
+++ b/src/tools/linkchecker/main.rs
@@ -17,12 +17,13 @@
 //! should catch the majority of "broken link" cases.
 
 use std::cell::{Cell, RefCell};
+use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
-use std::io::ErrorKind;
+use std::fs;
+use std::iter::once;
 use std::path::{Component, Path, PathBuf};
 use std::rc::Rc;
 use std::time::Instant;
-use std::{env, fs};
 
 use html5ever::tendril::ByteTendril;
 use html5ever::tokenizer::{
@@ -110,10 +111,25 @@ macro_rules! t {
     };
 }
 
+struct Cli {
+    docs: PathBuf,
+    link_targets_dirs: Vec<PathBuf>,
+}
+
 fn main() {
-    let docs = env::args_os().nth(1).expect("doc path should be first argument");
-    let docs = env::current_dir().unwrap().join(docs);
-    let mut checker = Checker { root: docs.clone(), cache: HashMap::new() };
+    let cli = match parse_cli() {
+        Ok(cli) => cli,
+        Err(err) => {
+            eprintln!("error: {err}");
+            usage_and_exit(1);
+        }
+    };
+
+    let mut checker = Checker {
+        root: cli.docs.clone(),
+        link_targets_dirs: cli.link_targets_dirs,
+        cache: HashMap::new(),
+    };
     let mut report = Report {
         errors: 0,
         start: Instant::now(),
@@ -125,7 +141,7 @@ fn main() {
         intra_doc_exceptions: 0,
         has_broken_urls: false,
     };
-    checker.walk(&docs, &mut report);
+    checker.walk(&cli.docs, &mut report);
     report.report();
     if report.errors != 0 {
         println!("found some broken links");
@@ -133,8 +149,50 @@ fn main() {
     }
 }
 
+fn parse_cli() -> Result<Cli, String> {
+    fn to_absolute_path(arg: &str) -> Result<PathBuf, String> {
+        std::path::absolute(arg).map_err(|e| format!("could not convert to absolute {arg}: {e}"))
+    }
+
+    let mut verbatim = false;
+    let mut docs = None;
+    let mut link_targets_dirs = Vec::new();
+
+    let mut args = std::env::args().skip(1);
+    while let Some(arg) = args.next() {
+        if !verbatim && arg == "--" {
+            verbatim = true;
+        } else if !verbatim && (arg == "-h" || arg == "--help") {
+            usage_and_exit(0)
+        } else if !verbatim && arg == "--link-targets-dir" {
+            link_targets_dirs.push(to_absolute_path(
+                &args.next().ok_or("missing value for --link-targets-dir")?,
+            )?);
+        } else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") {
+            link_targets_dirs.push(to_absolute_path(value)?);
+        } else if !verbatim && arg.starts_with('-') {
+            return Err(format!("unknown flag: {arg}"));
+        } else if docs.is_none() {
+            docs = Some(arg);
+        } else {
+            return Err("too many positional arguments".into());
+        }
+    }
+
+    Ok(Cli {
+        docs: to_absolute_path(&docs.ok_or("missing first positional argument")?)?,
+        link_targets_dirs,
+    })
+}
+
+fn usage_and_exit(code: i32) -> ! {
+    eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]");
+    std::process::exit(code)
+}
+
 struct Checker {
     root: PathBuf,
+    link_targets_dirs: Vec<PathBuf>,
     cache: Cache,
 }
 
@@ -420,37 +478,34 @@ impl Checker {
 
     /// Load a file from disk, or from the cache if available.
     fn load_file(&mut self, file: &Path, report: &mut Report) -> (String, &FileEntry) {
-        // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
-        #[cfg(windows)]
-        const ERROR_INVALID_NAME: i32 = 123;
-
         let pretty_path =
             file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string();
 
-        let entry =
-            self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) {
+        for base in once(&self.root).chain(self.link_targets_dirs.iter()) {
+            let entry = self.cache.entry(pretty_path.clone());
+            if let Entry::Occupied(e) = &entry
+                && !matches!(e.get(), FileEntry::Missing)
+            {
+                break;
+            }
+
+            let file = base.join(&pretty_path);
+            entry.insert_entry(match fs::metadata(&file) {
                 Ok(metadata) if metadata.is_dir() => FileEntry::Dir,
                 Ok(_) => {
                     if file.extension().and_then(|s| s.to_str()) != Some("html") {
                         FileEntry::OtherFile
                     } else {
                         report.html_files += 1;
-                        load_html_file(file, report)
+                        load_html_file(&file, report)
                     }
                 }
-                Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing,
-                Err(e) => {
-                    // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
-                    // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
-                    #[cfg(windows)]
-                    if e.raw_os_error() == Some(ERROR_INVALID_NAME)
-                        && file.as_os_str().to_str().map_or(false, |s| s.contains("::"))
-                    {
-                        return FileEntry::Missing;
-                    }
-                    panic!("unexpected read error for {}: {}", file.display(), e);
-                }
+                Err(e) if is_not_found_error(&file, &e) => FileEntry::Missing,
+                Err(e) => panic!("unexpected read error for {}: {}", file.display(), e),
             });
+        }
+
+        let entry = self.cache.get(&pretty_path).unwrap();
         (pretty_path, entry)
     }
 }
@@ -629,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
         ids.insert(encoded);
     }
 }
+
+fn is_not_found_error(path: &Path, error: &std::io::Error) -> bool {
+    // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
+    const WINDOWS_ERROR_INVALID_NAME: i32 = 123;
+
+    error.kind() == std::io::ErrorKind::NotFound
+        // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
+        // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
+        // in `LINKCHECK_EXCEPTIONS`.
+        || (cfg!(windows)
+            && error.raw_os_error() == Some(WINDOWS_ERROR_INVALID_NAME)
+            && path.as_os_str().to_str().map_or(false, |s| s.contains("::")))
+}