diff options
| author | nixxo <nixxo@protonmail.com> | 2025-09-04 09:33:11 +0200 |
|---|---|---|
| committer | Eric Huss <eric@huss.org> | 2025-09-08 11:08:40 -0700 |
| commit | 8b58777968a4c663d6c1293bacff6da99e6e5203 (patch) | |
| tree | 3ce5d4b8957ba97868e330f40e8dfe4df03a89f0 | |
| parent | 033c0a4742794f5608b19eb78458726596f8ec18 (diff) | |
| download | rust-8b58777968a4c663d6c1293bacff6da99e6e5203.tar.gz rust-8b58777968a4c663d6c1293bacff6da99e6e5203.zip | |
fix partial urlencoded link support
- added full urlencoding to properly check urlencoded anchor links against non-urlencoded heading IDs - added tests urlecoding provided by https://crates.io/crates/urlencoding
| -rw-r--r-- | Cargo.lock | 7 | ||||
| -rw-r--r-- | src/tools/linkchecker/Cargo.toml | 1 | ||||
| -rw-r--r-- | src/tools/linkchecker/main.rs | 13 | ||||
| -rw-r--r-- | src/tools/linkchecker/tests/valid/inner/bar.html | 3 | ||||
| -rw-r--r-- | src/tools/linkchecker/tests/valid/inner/foo.html | 8 | ||||
| -rw-r--r-- | src/tools/linkchecker/tests/valid/inner/redir-target.html | 3 |
6 files changed, 23 insertions, 12 deletions
diff --git a/Cargo.lock b/Cargo.lock index 52f50481157..fd4b661c40d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2167,6 +2167,7 @@ version = "0.1.0" dependencies = [ "html5ever", "regex", + "urlencoding", ] [[package]] @@ -5826,6 +5827,12 @@ dependencies = [ ] [[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + +[[package]] name = "utf-8" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/src/tools/linkchecker/Cargo.toml b/src/tools/linkchecker/Cargo.toml index fb5bff3fe63..f0886e31b24 100644 --- a/src/tools/linkchecker/Cargo.toml +++ b/src/tools/linkchecker/Cargo.toml @@ -10,3 +10,4 @@ path = "main.rs" [dependencies] regex = "1" html5ever = "0.29.0" +urlencoding = "2.1.3" diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 1dc45728c90..e07a0784cdb 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -232,18 +232,7 @@ enum FileEntry { type Cache = HashMap<String, FileEntry>; fn small_url_encode(s: &str) -> String { - s.replace('<', "%3C") - .replace('>', "%3E") - .replace(' ', "%20") - .replace('?', "%3F") - .replace('\'', "%27") - .replace('&', "%26") - .replace(',', "%2C") - .replace(':', "%3A") - .replace(';', "%3B") - .replace('[', "%5B") - .replace(']', "%5D") - .replace('\"', "%22") + urlencoding::encode(s).to_string() } impl Checker { diff --git a/src/tools/linkchecker/tests/valid/inner/bar.html b/src/tools/linkchecker/tests/valid/inner/bar.html index 4b500d78b76..6ffda259c40 100644 --- a/src/tools/linkchecker/tests/valid/inner/bar.html +++ b/src/tools/linkchecker/tests/valid/inner/bar.html @@ -3,5 +3,8 @@ <h2 id="barfrag">Bar</h2> + <!-- testing urlecoded anchor link against a non-urlencoded heading IDs --> + <h2 id="barfrag-è">Bar</h2> + </body> </html> diff --git a/src/tools/linkchecker/tests/valid/inner/foo.html b/src/tools/linkchecker/tests/valid/inner/foo.html index 3c6a7483bcd..f30bf718205 100644 --- a/src/tools/linkchecker/tests/valid/inner/foo.html +++ b/src/tools/linkchecker/tests/valid/inner/foo.html @@ -8,7 +8,15 @@ <a href="https://example.com/doesnotexist">external links not validated</a> <a href="redir.html#redirfrag">Redirect</a> + <!-- testing urlecoded anchor link against a non-urlencoded heading IDs --> + <a href="#localfrag-%C3%A8"></a> + <a href="bar.html#barfrag-%C3%A8"></a> + <a href="redir.html#redirfrag-%C3%A8"></a> + <h2 id="localfrag">Local</h2> + <!-- testing urlecoded anchor link against a non-urlencoded heading IDs --> + <h2 id="localfrag-è">Local</h2> + </body> </html> diff --git a/src/tools/linkchecker/tests/valid/inner/redir-target.html b/src/tools/linkchecker/tests/valid/inner/redir-target.html index bd59884a01e..ac1dec6d5b4 100644 --- a/src/tools/linkchecker/tests/valid/inner/redir-target.html +++ b/src/tools/linkchecker/tests/valid/inner/redir-target.html @@ -1,5 +1,8 @@ <html> <body> <h2 id="redirfrag">Redir</h2> + + <!-- testing urlecoded anchor link against a non-urlencoded heading IDs --> + <h2 id="redirfrag-è">Redir</h2> </body> </html> |
