about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/tools/linkchecker/main.rs77
1 files changed, 52 insertions, 25 deletions
diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs
index 4b45692e849..1dc45728c90 100644
--- a/src/tools/linkchecker/main.rs
+++ b/src/tools/linkchecker/main.rs
@@ -17,9 +17,10 @@
 //! should catch the majority of "broken link" cases.
 
 use std::cell::{Cell, RefCell};
+use std::collections::hash_map::Entry;
 use std::collections::{HashMap, HashSet};
 use std::fs;
-use std::io::ErrorKind;
+use std::iter::once;
 use std::path::{Component, Path, PathBuf};
 use std::rc::Rc;
 use std::time::Instant;
@@ -112,6 +113,7 @@ macro_rules! t {
 
 struct Cli {
     docs: PathBuf,
+    link_targets_dirs: Vec<PathBuf>,
 }
 
 fn main() {
@@ -123,7 +125,11 @@ fn main() {
         }
     };
 
-    let mut checker = Checker { root: cli.docs.clone(), cache: HashMap::new() };
+    let mut checker = Checker {
+        root: cli.docs.clone(),
+        link_targets_dirs: cli.link_targets_dirs,
+        cache: HashMap::new(),
+    };
     let mut report = Report {
         errors: 0,
         start: Instant::now(),
@@ -144,12 +150,13 @@ fn main() {
 }
 
 fn parse_cli() -> Result<Cli, String> {
-    fn to_canonical_path(arg: &str) -> Result<PathBuf, String> {
-        PathBuf::from(arg).canonicalize().map_err(|e| format!("could not canonicalize {arg}: {e}"))
+    fn to_absolute_path(arg: &str) -> Result<PathBuf, String> {
+        std::path::absolute(arg).map_err(|e| format!("could not convert to absolute {arg}: {e}"))
     }
 
     let mut verbatim = false;
     let mut docs = None;
+    let mut link_targets_dirs = Vec::new();
 
     let mut args = std::env::args().skip(1);
     while let Some(arg) = args.next() {
@@ -157,6 +164,12 @@ fn parse_cli() -> Result<Cli, String> {
             verbatim = true;
         } else if !verbatim && (arg == "-h" || arg == "--help") {
             usage_and_exit(0)
+        } else if !verbatim && arg == "--link-targets-dir" {
+            link_targets_dirs.push(to_absolute_path(
+                &args.next().ok_or("missing value for --link-targets-dir")?,
+            )?);
+        } else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") {
+            link_targets_dirs.push(to_absolute_path(value)?);
         } else if !verbatim && arg.starts_with('-') {
             return Err(format!("unknown flag: {arg}"));
         } else if docs.is_none() {
@@ -166,16 +179,20 @@ fn parse_cli() -> Result<Cli, String> {
         }
     }
 
-    Ok(Cli { docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)? })
+    Ok(Cli {
+        docs: to_absolute_path(&docs.ok_or("missing first positional argument")?)?,
+        link_targets_dirs,
+    })
 }
 
 fn usage_and_exit(code: i32) -> ! {
-    eprintln!("usage: linkchecker <path>");
+    eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]");
     std::process::exit(code)
 }
 
 struct Checker {
     root: PathBuf,
+    link_targets_dirs: Vec<PathBuf>,
     cache: Cache,
 }
 
@@ -461,37 +478,34 @@ impl Checker {
 
     /// Load a file from disk, or from the cache if available.
     fn load_file(&mut self, file: &Path, report: &mut Report) -> (String, &FileEntry) {
-        // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
-        #[cfg(windows)]
-        const ERROR_INVALID_NAME: i32 = 123;
-
         let pretty_path =
             file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string();
 
-        let entry =
-            self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) {
+        for base in once(&self.root).chain(self.link_targets_dirs.iter()) {
+            let entry = self.cache.entry(pretty_path.clone());
+            if let Entry::Occupied(e) = &entry
+                && !matches!(e.get(), FileEntry::Missing)
+            {
+                break;
+            }
+
+            let file = base.join(&pretty_path);
+            entry.insert_entry(match fs::metadata(&file) {
                 Ok(metadata) if metadata.is_dir() => FileEntry::Dir,
                 Ok(_) => {
                     if file.extension().and_then(|s| s.to_str()) != Some("html") {
                         FileEntry::OtherFile
                     } else {
                         report.html_files += 1;
-                        load_html_file(file, report)
+                        load_html_file(&file, report)
                     }
                 }
-                Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing,
-                Err(e) => {
-                    // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`.
-                    // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`.
-                    #[cfg(windows)]
-                    if e.raw_os_error() == Some(ERROR_INVALID_NAME)
-                        && file.as_os_str().to_str().map_or(false, |s| s.contains("::"))
-                    {
-                        return FileEntry::Missing;
-                    }
-                    panic!("unexpected read error for {}: {}", file.display(), e);
-                }
+                Err(e) if is_not_found_error(&file, &e) => FileEntry::Missing,
+                Err(e) => panic!("unexpected read error for {}: {}", file.display(), e),
             });
+        }
+
+        let entry = self.cache.get(&pretty_path).unwrap();
         (pretty_path, entry)
     }
 }
@@ -670,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R
         ids.insert(encoded);
     }
 }
+
+fn is_not_found_error(path: &Path, error: &std::io::Error) -> bool {
+    // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
+    const WINDOWS_ERROR_INVALID_NAME: i32 = 123;
+
+    error.kind() == std::io::ErrorKind::NotFound
+        // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME`
+        // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed
+        // in `LINKCHECK_EXCEPTIONS`.
+        || (cfg!(windows)
+            && error.raw_os_error() == Some(WINDOWS_ERROR_INVALID_NAME)
+            && path.as_os_str().to_str().map_or(false, |s| s.contains("::")))
+}