diff options
| -rw-r--r-- | src/tools/linkchecker/main.rs | 77 |
1 files changed, 52 insertions, 25 deletions
diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 4b45692e849..1dc45728c90 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -17,9 +17,10 @@ //! should catch the majority of "broken link" cases. use std::cell::{Cell, RefCell}; +use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::fs; -use std::io::ErrorKind; +use std::iter::once; use std::path::{Component, Path, PathBuf}; use std::rc::Rc; use std::time::Instant; @@ -112,6 +113,7 @@ macro_rules! t { struct Cli { docs: PathBuf, + link_targets_dirs: Vec<PathBuf>, } fn main() { @@ -123,7 +125,11 @@ fn main() { } }; - let mut checker = Checker { root: cli.docs.clone(), cache: HashMap::new() }; + let mut checker = Checker { + root: cli.docs.clone(), + link_targets_dirs: cli.link_targets_dirs, + cache: HashMap::new(), + }; let mut report = Report { errors: 0, start: Instant::now(), @@ -144,12 +150,13 @@ fn main() { } fn parse_cli() -> Result<Cli, String> { - fn to_canonical_path(arg: &str) -> Result<PathBuf, String> { - PathBuf::from(arg).canonicalize().map_err(|e| format!("could not canonicalize {arg}: {e}")) + fn to_absolute_path(arg: &str) -> Result<PathBuf, String> { + std::path::absolute(arg).map_err(|e| format!("could not convert to absolute {arg}: {e}")) } let mut verbatim = false; let mut docs = None; + let mut link_targets_dirs = Vec::new(); let mut args = std::env::args().skip(1); while let Some(arg) = args.next() { @@ -157,6 +164,12 @@ fn parse_cli() -> Result<Cli, String> { verbatim = true; } else if !verbatim && (arg == "-h" || arg == "--help") { usage_and_exit(0) + } else if !verbatim && arg == "--link-targets-dir" { + link_targets_dirs.push(to_absolute_path( + &args.next().ok_or("missing value for --link-targets-dir")?, + )?); + } else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") { + link_targets_dirs.push(to_absolute_path(value)?); } else if !verbatim && arg.starts_with('-') { return Err(format!("unknown flag: {arg}")); } else if docs.is_none() { @@ -166,16 +179,20 @@ fn parse_cli() -> Result<Cli, String> { } } - Ok(Cli { docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)? }) + Ok(Cli { + docs: to_absolute_path(&docs.ok_or("missing first positional argument")?)?, + link_targets_dirs, + }) } fn usage_and_exit(code: i32) -> ! { - eprintln!("usage: linkchecker <path>"); + eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]"); std::process::exit(code) } struct Checker { root: PathBuf, + link_targets_dirs: Vec<PathBuf>, cache: Cache, } @@ -461,37 +478,34 @@ impl Checker { /// Load a file from disk, or from the cache if available. fn load_file(&mut self, file: &Path, report: &mut Report) -> (String, &FileEntry) { - // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- - #[cfg(windows)] - const ERROR_INVALID_NAME: i32 = 123; - let pretty_path = file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string(); - let entry = - self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) { + for base in once(&self.root).chain(self.link_targets_dirs.iter()) { + let entry = self.cache.entry(pretty_path.clone()); + if let Entry::Occupied(e) = &entry + && !matches!(e.get(), FileEntry::Missing) + { + break; + } + + let file = base.join(&pretty_path); + entry.insert_entry(match fs::metadata(&file) { Ok(metadata) if metadata.is_dir() => FileEntry::Dir, Ok(_) => { if file.extension().and_then(|s| s.to_str()) != Some("html") { FileEntry::OtherFile } else { report.html_files += 1; - load_html_file(file, report) + load_html_file(&file, report) } } - Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing, - Err(e) => { - // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` rather than `NotFound`. - // Explicitly check for that so that the broken link can be allowed in `LINKCHECK_EXCEPTIONS`. - #[cfg(windows)] - if e.raw_os_error() == Some(ERROR_INVALID_NAME) - && file.as_os_str().to_str().map_or(false, |s| s.contains("::")) - { - return FileEntry::Missing; - } - panic!("unexpected read error for {}: {}", file.display(), e); - } + Err(e) if is_not_found_error(&file, &e) => FileEntry::Missing, + Err(e) => panic!("unexpected read error for {}: {}", file.display(), e), }); + } + + let entry = self.cache.get(&pretty_path).unwrap(); (pretty_path, entry) } } @@ -670,3 +684,16 @@ fn parse_ids(ids: &mut HashSet<String>, file: &str, source: &str, report: &mut R ids.insert(encoded); } } + +fn is_not_found_error(path: &Path, error: &std::io::Error) -> bool { + // https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499- + const WINDOWS_ERROR_INVALID_NAME: i32 = 123; + + error.kind() == std::io::ErrorKind::NotFound + // If a broken intra-doc link contains `::`, on windows, it will cause `ERROR_INVALID_NAME` + // rather than `NotFound`. Explicitly check for that so that the broken link can be allowed + // in `LINKCHECK_EXCEPTIONS`. + || (cfg!(windows) + && error.raw_os_error() == Some(WINDOWS_ERROR_INVALID_NAME) + && path.as_os_str().to_str().map_or(false, |s| s.contains("::"))) +} |
