about summary refs log tree commit diff
diff options
context:
space:
mode:
authorOliver Middleton <olliemail27@gmail.com>2017-05-14 17:57:59 +0100
committerOliver Middleton <olliemail27@gmail.com>2017-05-14 18:06:35 +0100
commitd4f20eb7e32bad9049663d30cfcb0e246ef2e031 (patch)
treeadfd3fa354243633be8759e144c5fee8e3d63486
parent21ca9cab7d3a91379005546ac6b0e8c4118e0f34 (diff)
downloadrust-d4f20eb7e32bad9049663d30cfcb0e246ef2e031.tar.gz
rust-d4f20eb7e32bad9049663d30cfcb0e246ef2e031.zip
linkchecker: Add support for <base> tag
Add support for the HTML <base> tag as used by mdBook so The Unstable
Book can be checked.

Also cleanup a few things:
* Stop checking the name attribute. It should never have been used and
mdBook has since been fixed not to use it.
* Make sure we only check html files.
* Remove a few unnecessary allocations.

Finally, dead links in The Unstable Book have been fixed.
-rw-r--r--src/tools/linkchecker/main.rs89
1 files changed, 33 insertions, 56 deletions
diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs
index 3d9a4fba6cd..1b55dc792c2 100644
--- a/src/tools/linkchecker/main.rs
+++ b/src/tools/linkchecker/main.rs
@@ -41,7 +41,7 @@ macro_rules! t {
 }
 
 fn main() {
-    let docs = env::args().nth(1).unwrap();
+    let docs = env::args_os().nth(1).unwrap();
     let docs = env::current_dir().unwrap().join(docs);
     let mut errors = false;
     walk(&mut HashMap::new(), &docs, &docs, &mut errors);
@@ -65,7 +65,6 @@ enum Redirect {
 struct FileEntry {
     source: String,
     ids: HashSet<String>,
-    names: HashSet<String>,
 }
 
 type Cache = HashMap<PathBuf, FileEntry>;
@@ -73,7 +72,7 @@ type Cache = HashMap<PathBuf, FileEntry>;
 impl FileEntry {
     fn parse_ids(&mut self, file: &Path, contents: &str, errors: &mut bool) {
         if self.ids.is_empty() {
-            with_attrs_in_source(contents, " id", |fragment, i| {
+            with_attrs_in_source(contents, " id", |fragment, i, _| {
                 let frag = fragment.trim_left_matches("#").to_owned();
                 if !self.ids.insert(frag) {
                     *errors = true;
@@ -82,15 +81,6 @@ impl FileEntry {
             });
         }
     }
-
-    fn parse_names(&mut self, contents: &str) {
-        if self.names.is_empty() {
-            with_attrs_in_source(contents, " name", |fragment, _| {
-                let frag = fragment.trim_left_matches("#").to_owned();
-                self.names.insert(frag);
-            });
-        }
-    }
 }
 
 fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
@@ -116,15 +106,8 @@ fn check(cache: &mut Cache,
          file: &Path,
          errors: &mut bool)
          -> Option<PathBuf> {
-    // ignore js files as they are not prone to errors as the rest of the
-    // documentation is and they otherwise bring up false positives.
-    if file.extension().and_then(|s| s.to_str()) == Some("js") {
-        return None;
-    }
-
-    // ignore handlebars files as they use {{}} to build links, we only
-    // want to test the generated files
-    if file.extension().and_then(|s| s.to_str()) == Some("hbs") {
+    // Ignore none HTML files.
+    if file.extension().and_then(|s| s.to_str()) != Some("html") {
         return None;
     }
 
@@ -147,13 +130,7 @@ fn check(cache: &mut Cache,
         return None;
     }
 
-    // mdbook uses the HTML <base> tag to handle links for subdirectories, which
-    // linkchecker doesn't support
-    if file.to_str().unwrap().contains("unstable-book") {
-        return None;
-    }
-
-    let res = load_file(cache, root, PathBuf::from(file), SkipRedirect);
+    let res = load_file(cache, root, file, SkipRedirect);
     let (pretty_file, contents) = match res {
         Ok(res) => res,
         Err(_) => return None,
@@ -162,13 +139,10 @@ fn check(cache: &mut Cache,
         cache.get_mut(&pretty_file)
              .unwrap()
              .parse_ids(&pretty_file, &contents, errors);
-        cache.get_mut(&pretty_file)
-             .unwrap()
-             .parse_names(&contents);
     }
 
     // Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
-    with_attrs_in_source(&contents, " href", |url, i| {
+    with_attrs_in_source(&contents, " href", |url, i, base| {
         // Ignore external URLs
         if url.starts_with("http:") || url.starts_with("https:") ||
            url.starts_with("javascript:") || url.starts_with("ftp:") ||
@@ -184,9 +158,9 @@ fn check(cache: &mut Cache,
         // Once we've plucked out the URL, parse it using our base url and
         // then try to extract a file path.
         let mut path = file.to_path_buf();
-        if !url.is_empty() {
+        if !base.is_empty() || !url.is_empty() {
             path.pop();
-            for part in Path::new(url).components() {
+            for part in Path::new(base).join(url).components() {
                 match part {
                     Component::Prefix(_) |
                     Component::RootDir => panic!(),
@@ -197,13 +171,6 @@ fn check(cache: &mut Cache,
             }
         }
 
-        if let Some(extension) = path.extension() {
-            // don't check these files
-            if extension == "png" {
-                return;
-            }
-        }
-
         // Alright, if we've found a file name then this file had better
         // exist! If it doesn't then we register and print an error.
         if path.exists() {
@@ -218,11 +185,17 @@ fn check(cache: &mut Cache,
                          pretty_path.display());
                 return;
             }
-            let res = load_file(cache, root, path.clone(), FromRedirect(false));
+            if let Some(extension) = path.extension() {
+                // Ignore none HTML files.
+                if extension != "html" {
+                    return;
+                }
+            }
+            let res = load_file(cache, root, &path, FromRedirect(false));
             let (pretty_path, contents) = match res {
                 Ok(res) => res,
                 Err(LoadError::IOError(err)) => {
-                    panic!(format!("error loading {}: {}", path.display(), err));
+                    panic!("error loading {}: {}", path.display(), err);
                 }
                 Err(LoadError::BrokenRedirect(target, _)) => {
                     *errors = true;
@@ -245,11 +218,10 @@ fn check(cache: &mut Cache,
 
                 let entry = &mut cache.get_mut(&pretty_path).unwrap();
                 entry.parse_ids(&pretty_path, &contents, errors);
-                entry.parse_names(&contents);
 
-                if !(entry.ids.contains(*fragment) || entry.names.contains(*fragment)) {
+                if !entry.ids.contains(*fragment) {
                     *errors = true;
-                    print!("{}:{}: broken link fragment  ",
+                    print!("{}:{}: broken link fragment ",
                            pretty_file.display(),
                            i + 1);
                     println!("`#{}` pointing to `{}`", fragment, pretty_path.display());
@@ -267,7 +239,7 @@ fn check(cache: &mut Cache,
 
 fn load_file(cache: &mut Cache,
              root: &Path,
-             mut file: PathBuf,
+             file: &Path,
              redirect: Redirect)
              -> Result<(PathBuf, String), LoadError> {
     let mut contents = String::new();
@@ -279,9 +251,9 @@ fn load_file(cache: &mut Cache,
             None
         }
         Entry::Vacant(entry) => {
-            let mut fp = File::open(file.clone()).map_err(|err| {
+            let mut fp = File::open(file).map_err(|err| {
                 if let FromRedirect(true) = redirect {
-                    LoadError::BrokenRedirect(file.clone(), err)
+                    LoadError::BrokenRedirect(file.to_path_buf(), err)
                 } else {
                     LoadError::IOError(err)
                 }
@@ -297,17 +269,14 @@ fn load_file(cache: &mut Cache,
                 entry.insert(FileEntry {
                     source: contents.clone(),
                     ids: HashSet::new(),
-                    names: HashSet::new(),
                 });
             }
             maybe
         }
     };
-    file.pop();
-    match maybe_redirect.map(|url| file.join(url)) {
+    match maybe_redirect.map(|url| file.parent().unwrap().join(url)) {
         Some(redirect_file) => {
-            let path = PathBuf::from(redirect_file);
-            load_file(cache, root, path, FromRedirect(true))
+            load_file(cache, root, &redirect_file, FromRedirect(true))
         }
         None => Ok((pretty_file, contents)),
     }
@@ -329,10 +298,14 @@ fn maybe_redirect(source: &str) -> Option<String> {
     })
 }
 
-fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f: F) {
+fn with_attrs_in_source<F: FnMut(&str, usize, &str)>(contents: &str, attr: &str, mut f: F) {
+    let mut base = "";
     for (i, mut line) in contents.lines().enumerate() {
         while let Some(j) = line.find(attr) {
             let rest = &line[j + attr.len()..];
+            // The base tag should always be the first link in the document so
+            // we can get away with using one pass.
+            let is_base = line[..j].ends_with("<base");
             line = rest;
             let pos_equals = match rest.find("=") {
                 Some(i) => i,
@@ -358,7 +331,11 @@ fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f
                 Some(i) => &rest[..i],
                 None => continue,
             };
-            f(url, i)
+            if is_base {
+                base = url;
+                continue;
+            }
+            f(url, i, base)
         }
     }
 }