about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorStuart Cook <Zalathar@users.noreply.github.com>2025-09-09 14:35:03 +1000
committerGitHub <noreply@github.com>2025-09-09 14:35:03 +1000
commit3bd603b2394a7d98426ec9263eae08a418544e92 (patch)
tree63ffd19e1192a7182af8ef0f28f3eccc4fb29e71 /src
parentc5a62b80587ef3c48245990ad5cd8f02518b2565 (diff)
parent8b58777968a4c663d6c1293bacff6da99e6e5203 (diff)
downloadrust-3bd603b2394a7d98426ec9263eae08a418544e92.tar.gz
rust-3bd603b2394a7d98426ec9263eae08a418544e92.zip
Rollup merge of #146195 - nixxo:urlencoding-fix, r=ehuss
fix partial urlencoded link support

Hello Rust community.
This is my first contribution, hope is useful.

While translating in Italian the rust book https://github.com/nixxo/rust-lang-book-it I noticed that the linkchecker tool was failing reporting broken links on some pages even if the link worked properly in the browser. Upon inspection I noticed that mdbook basically urlencoded the links, but not urlencoded the heading IDs resulting in a non-identical anchor/IDs pairing that linkchecker reports as non-valid.

looking at the source code for the linkchecker tool I noticed that urlencoding was done by the `small_url_encode` function in a partial way, as the name suggests. Replacing this function with a full urlencoding fixes the issue and the links are properly reported as valid.

- added full urlencoding to properly check urlencoded anchor links against non-urlencoded heading IDs
- added tests

urlecoding provided by https://crates.io/crates/urlencoding
Diffstat (limited to 'src')
-rw-r--r--src/tools/linkchecker/Cargo.toml1
-rw-r--r--src/tools/linkchecker/main.rs13
-rw-r--r--src/tools/linkchecker/tests/valid/inner/bar.html3
-rw-r--r--src/tools/linkchecker/tests/valid/inner/foo.html8
-rw-r--r--src/tools/linkchecker/tests/valid/inner/redir-target.html3
5 files changed, 16 insertions, 12 deletions
diff --git a/src/tools/linkchecker/Cargo.toml b/src/tools/linkchecker/Cargo.toml
index fb5bff3fe63..f0886e31b24 100644
--- a/src/tools/linkchecker/Cargo.toml
+++ b/src/tools/linkchecker/Cargo.toml
@@ -10,3 +10,4 @@ path = "main.rs"
 [dependencies]
 regex = "1"
 html5ever = "0.29.0"
+urlencoding = "2.1.3"
diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs
index 1dc45728c90..e07a0784cdb 100644
--- a/src/tools/linkchecker/main.rs
+++ b/src/tools/linkchecker/main.rs
@@ -232,18 +232,7 @@ enum FileEntry {
 type Cache = HashMap<String, FileEntry>;
 
 fn small_url_encode(s: &str) -> String {
-    s.replace('<', "%3C")
-        .replace('>', "%3E")
-        .replace(' ', "%20")
-        .replace('?', "%3F")
-        .replace('\'', "%27")
-        .replace('&', "%26")
-        .replace(',', "%2C")
-        .replace(':', "%3A")
-        .replace(';', "%3B")
-        .replace('[', "%5B")
-        .replace(']', "%5D")
-        .replace('\"', "%22")
+    urlencoding::encode(s).to_string()
 }
 
 impl Checker {
diff --git a/src/tools/linkchecker/tests/valid/inner/bar.html b/src/tools/linkchecker/tests/valid/inner/bar.html
index 4b500d78b76..6ffda259c40 100644
--- a/src/tools/linkchecker/tests/valid/inner/bar.html
+++ b/src/tools/linkchecker/tests/valid/inner/bar.html
@@ -3,5 +3,8 @@
 
   <h2 id="barfrag">Bar</h2>
 
+  <!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
+  <h2 id="barfrag-è">Bar</h2>
+
 </body>
 </html>
diff --git a/src/tools/linkchecker/tests/valid/inner/foo.html b/src/tools/linkchecker/tests/valid/inner/foo.html
index 3c6a7483bcd..f30bf718205 100644
--- a/src/tools/linkchecker/tests/valid/inner/foo.html
+++ b/src/tools/linkchecker/tests/valid/inner/foo.html
@@ -8,7 +8,15 @@
   <a href="https://example.com/doesnotexist">external links not validated</a>
   <a href="redir.html#redirfrag">Redirect</a>
 
+  <!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
+  <a href="#localfrag-%C3%A8"></a>
+  <a href="bar.html#barfrag-%C3%A8"></a>
+  <a href="redir.html#redirfrag-%C3%A8"></a>
+
   <h2 id="localfrag">Local</h2>
 
+  <!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
+  <h2 id="localfrag-è">Local</h2>
+
 </body>
 </html>
diff --git a/src/tools/linkchecker/tests/valid/inner/redir-target.html b/src/tools/linkchecker/tests/valid/inner/redir-target.html
index bd59884a01e..ac1dec6d5b4 100644
--- a/src/tools/linkchecker/tests/valid/inner/redir-target.html
+++ b/src/tools/linkchecker/tests/valid/inner/redir-target.html
@@ -1,5 +1,8 @@
 <html>
 <body>
   <h2 id="redirfrag">Redir</h2>
+
+  <!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
+  <h2 id="redirfrag-è">Redir</h2>
 </body>
 </html>