about summary refs log tree commit diff
diff options
context:
space:
mode:
authorManish Goregaokar <manishsmail@gmail.com>2016-07-02 15:46:49 +0530
committerGitHub <noreply@github.com>2016-07-02 15:46:49 +0530
commit533ce9a7a69b60a191a9d0598e3072db0552e5f7 (patch)
treec2fc5487de76fa0f32cf45f142e682254ea96759
parent76705df226f0a172531d3405fa85689bdf7547f7 (diff)
parent01386e649265a71d9d91cf9ec138d6f046dc438b (diff)
downloadrust-533ce9a7a69b60a191a9d0598e3072db0552e5f7.tar.gz
rust-533ce9a7a69b60a191a9d0598e3072db0552e5f7.zip
Rollup merge of #34566 - ollie27:linkchecker_invalid_urls, r=alexcrichton
Reject invalid urls in linkchecker

For example root-relative links will now be rejected.

Also remove some exceptions which have since been fixed and fix a typo in
the broken redirect handling.
-rw-r--r--src/tools/linkchecker/main.rs43
1 files changed, 20 insertions, 23 deletions
diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs
index 4b74833eaf7..80c37d55975 100644
--- a/src/tools/linkchecker/main.rs
+++ b/src/tools/linkchecker/main.rs
@@ -138,22 +138,6 @@ fn check(cache: &mut Cache,
         return None;
     }
 
-    if file.ends_with("std/sys/ext/index.html") {
-        return None;
-    }
-
-    if let Some(file) = file.to_str() {
-        // FIXME(#31948)
-        if file.contains("ParseFloatError") {
-            return None;
-        }
-        // weird reexports, but this module is on its way out, so chalk it up to
-        // "rustdoc weirdness" and move on from there
-        if file.contains("scoped_tls") {
-            return None;
-        }
-    }
-
     let mut parser = UrlParser::new();
     parser.base_url(base);
 
@@ -170,12 +154,24 @@ fn check(cache: &mut Cache,
 
     // Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
     with_attrs_in_source(&contents, " href", |url, i| {
+        // Ignore external URLs
+        if url.starts_with("http:") || url.starts_with("https:") ||
+           url.starts_with("javascript:") || url.starts_with("ftp:") ||
+           url.starts_with("irc:") || url.starts_with("data:") {
+            return;
+        }
         // Once we've plucked out the URL, parse it using our base url and
-        // then try to extract a file path. If either of these fail then we
-        // just keep going.
+        // then try to extract a file path.
         let (parsed_url, path) = match url_to_file_path(&parser, url) {
             Some((url, path)) => (url, PathBuf::from(path)),
-            None => return,
+            None => {
+                *errors = true;
+                println!("{}:{}: invalid link - {}",
+                         pretty_file.display(),
+                         i + 1,
+                         url);
+                return;
+            }
         };
 
         // Alright, if we've found a file name then this file had better
@@ -197,10 +193,11 @@ fn check(cache: &mut Cache,
                 Ok(res) => res,
                 Err(LoadError::IOError(err)) => panic!(format!("{}", err)),
                 Err(LoadError::BrokenRedirect(target, _)) => {
-                    print!("{}:{}: broken redirect to {}",
-                           pretty_file.display(),
-                           i + 1,
-                           target.display());
+                    *errors = true;
+                    println!("{}:{}: broken redirect to {}",
+                             pretty_file.display(),
+                             i + 1,
+                             target.display());
                     return;
                 }
                 Err(LoadError::IsRedirect) => unreachable!(),