about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Howell <michael@notriddle.com>2023-02-03 17:36:27 -0700
committerMichael Howell <michael@notriddle.com>2023-02-03 17:58:26 -0700
commit5f98a7f00e338c0985e7743be5b23dbd8f1039b3 (patch)
tree93bf5c211f244b20af8f2336127b63fcbe9e704b
parent658fad6c5506f41c35b64fb1a22ceb0992697ff3 (diff)
downloadrust-5f98a7f00e338c0985e7743be5b23dbd8f1039b3.tar.gz
rust-5f98a7f00e338c0985e7743be5b23dbd8f1039b3.zip
rustdoc: use the same URL escape rules for fragments as for examples
-rw-r--r--src/librustdoc/html/markdown.rs43
-rw-r--r--src/librustdoc/html/render/mod.rs75
-rw-r--r--tests/rustdoc/const-generics/const-generics-docs.rs6
-rw-r--r--tests/rustdoc/const-generics/const-impl.rs10
-rw-r--r--tests/rustdoc/double-quote-escape.rs2
-rw-r--r--tests/rustdoc/primitive-tuple-variadic.rs4
-rw-r--r--tests/rustdoc/sidebar-links-to-foreign-impl.rs4
-rw-r--r--tests/rustdoc/where-clause-order.rs2
8 files changed, 68 insertions, 78 deletions
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs
index 00e3f859bfc..03382aeeb73 100644
--- a/src/librustdoc/html/markdown.rs
+++ b/src/librustdoc/html/markdown.rs
@@ -46,6 +46,7 @@ use crate::html::escape::Escape;
 use crate::html::format::Buffer;
 use crate::html::highlight;
 use crate::html::length_limit::HtmlWithLimit;
+use crate::html::render::small_url_encode;
 use crate::html::toc::TocBuilder;
 
 use pulldown_cmark::{
@@ -294,47 +295,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
                 doctest::make_test(&test, krate, false, &Default::default(), edition, None);
             let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
 
-            // These characters don't need to be escaped in a URI.
-            // See https://url.spec.whatwg.org/#query-percent-encode-set
-            // and https://url.spec.whatwg.org/#urlencoded-parsing
-            // and https://url.spec.whatwg.org/#url-code-points
-            fn dont_escape(c: u8) -> bool {
-                (b'a' <= c && c <= b'z')
-                    || (b'A' <= c && c <= b'Z')
-                    || (b'0' <= c && c <= b'9')
-                    || c == b'-'
-                    || c == b'_'
-                    || c == b'.'
-                    || c == b','
-                    || c == b'~'
-                    || c == b'!'
-                    || c == b'\''
-                    || c == b'('
-                    || c == b')'
-                    || c == b'*'
-                    || c == b'/'
-                    || c == b';'
-                    || c == b':'
-                    || c == b'?'
-                    // As described in urlencoded-parsing, the
-                    // first `=` is the one that separates key from
-                    // value. Following `=`s are part of the value.
-                    || c == b'='
-            }
-            let mut test_escaped = String::new();
-            for b in test.bytes() {
-                if dont_escape(b) {
-                    test_escaped.push(char::from(b));
-                } else if b == b' ' {
-                    // URL queries are decoded with + replaced with SP
-                    test_escaped.push('+');
-                } else if b == b'%' {
-                    test_escaped.push('%');
-                    test_escaped.push('%');
-                } else {
-                    write!(test_escaped, "%{:02X}", b).unwrap();
-                }
-            }
+            let test_escaped = small_url_encode(test);
             Some(format!(
                 r#"<a class="test-arrow" target="_blank" href="{}?code={}{}&amp;edition={}">Run</a>"#,
                 url, test_escaped, channel, edition,
diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs
index 816a8f4e274..fa22c461205 100644
--- a/src/librustdoc/html/render/mod.rs
+++ b/src/librustdoc/html/render/mod.rs
@@ -38,7 +38,7 @@ pub(crate) use self::span_map::{collect_spans_and_sources, LinkFromSrc};
 
 use std::collections::VecDeque;
 use std::default::Default;
-use std::fmt;
+use std::fmt::{self, Write};
 use std::fs;
 use std::iter::Peekable;
 use std::path::PathBuf;
@@ -2020,31 +2020,60 @@ fn get_associated_constants(
         .collect::<Vec<_>>()
 }
 
-// The point is to url encode any potential character from a type with genericity.
-fn small_url_encode(s: String) -> String {
+pub(crate) fn small_url_encode(s: String) -> String {
+    // These characters don't need to be escaped in a URI.
+    // See https://url.spec.whatwg.org/#query-percent-encode-set
+    // and https://url.spec.whatwg.org/#urlencoded-parsing
+    // and https://url.spec.whatwg.org/#url-code-points
+    fn dont_escape(c: u8) -> bool {
+        (b'a' <= c && c <= b'z')
+            || (b'A' <= c && c <= b'Z')
+            || (b'0' <= c && c <= b'9')
+            || c == b'-'
+            || c == b'_'
+            || c == b'.'
+            || c == b','
+            || c == b'~'
+            || c == b'!'
+            || c == b'\''
+            || c == b'('
+            || c == b')'
+            || c == b'*'
+            || c == b'/'
+            || c == b';'
+            || c == b':'
+            || c == b'?'
+            // As described in urlencoded-parsing, the
+            // first `=` is the one that separates key from
+            // value. Following `=`s are part of the value.
+            || c == b'='
+    }
     let mut st = String::new();
     let mut last_match = 0;
-    for (idx, c) in s.char_indices() {
-        let escaped = match c {
-            '<' => "%3C",
-            '>' => "%3E",
-            ' ' => "%20",
-            '?' => "%3F",
-            '\'' => "%27",
-            '&' => "%26",
-            ',' => "%2C",
-            ':' => "%3A",
-            ';' => "%3B",
-            '[' => "%5B",
-            ']' => "%5D",
-            '"' => "%22",
-            _ => continue,
-        };
+    for (idx, b) in s.bytes().enumerate() {
+        if dont_escape(b) {
+            continue;
+        }
 
-        st += &s[last_match..idx];
-        st += escaped;
-        // NOTE: we only expect single byte characters here - which is fine as long as we
-        // only match single byte characters
+        if last_match != idx {
+            // Invariant: `idx` must be the first byte in a character at this point.
+            st += &s[last_match..idx];
+        }
+        if b == b' ' {
+            // URL queries are decoded with + replaced with SP.
+            // While the same is not true for hashes, rustdoc only needs to be
+            // consistent with itself when encoding them.
+            st += "+";
+        } else if b == b'%' {
+            st += "%%";
+        } else {
+            write!(st, "%{:02X}", b).unwrap();
+        }
+        // Invariant: if the current byte is not at the start of a multi-byte character,
+        // we need to get down here so that when the next turn of the loop comes around,
+        // last_match winds up equalling idx.
+        //
+        // In other words, dont_escape must always return `false` in multi-byte character.
         last_match = idx + 1;
     }
 
diff --git a/tests/rustdoc/const-generics/const-generics-docs.rs b/tests/rustdoc/const-generics/const-generics-docs.rs
index ade70bbe80d..7e27ef8d8e5 100644
--- a/tests/rustdoc/const-generics/const-generics-docs.rs
+++ b/tests/rustdoc/const-generics/const-generics-docs.rs
@@ -21,8 +21,8 @@ pub use extern_crate::WTrait;
 //      'pub trait Trait<const N: usize>'
 // @has - '//*[@id="impl-Trait%3C1%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<1> for u8'
 // @has - '//*[@id="impl-Trait%3C2%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<2> for u8'
-// @has - '//*[@id="impl-Trait%3C{1%20+%202}%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8'
-// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8%3B%20N%5D"]//h3[@class="code-header"]' \
+// @has - '//*[@id="impl-Trait%3C%7B1+%2B+2%7D%3E-for-u8"]//h3[@class="code-header"]' 'impl Trait<{1 + 2}> for u8'
+// @has - '//*[@id="impl-Trait%3CN%3E-for-%5Bu8;+N%5D"]//h3[@class="code-header"]' \
 //      'impl<const N: usize> Trait<N> for [u8; N]'
 pub trait Trait<const N: usize> {}
 impl Trait<1> for u8 {}
@@ -47,7 +47,7 @@ impl<const M: usize> Foo<M> where u8: Trait<M> {
     }
 }
 
-// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8%2C%20M%3E"]/h3[@class="code-header"]' 'impl<const M: usize> Bar<u8, M>'
+// @has foo/struct.Bar.html '//*[@id="impl-Bar%3Cu8,+M%3E"]/h3[@class="code-header"]' 'impl<const M: usize> Bar<u8, M>'
 impl<const M: usize> Bar<u8, M> {
     // @has - '//*[@id="method.hey"]' \
     //      'pub fn hey<const N: usize>(&self) -> Foo<N>where u8: Trait<N>'
diff --git a/tests/rustdoc/const-generics/const-impl.rs b/tests/rustdoc/const-generics/const-impl.rs
index 91866b7d890..152b643bf4b 100644
--- a/tests/rustdoc/const-generics/const-impl.rs
+++ b/tests/rustdoc/const-generics/const-impl.rs
@@ -9,20 +9,20 @@ pub enum Order {
 }
 
 // @has foo/struct.VSet.html '//pre[@class="rust item-decl"]' 'pub struct VSet<T, const ORDER: Order>'
-// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Send for VSet<T, ORDER>'
-// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT%2C%20ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Sync for VSet<T, ORDER>'
+// @has foo/struct.VSet.html '//*[@id="impl-Send-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Send for VSet<T, ORDER>'
+// @has foo/struct.VSet.html '//*[@id="impl-Sync-for-VSet%3CT,+ORDER%3E"]/h3[@class="code-header"]' 'impl<T, const ORDER: Order> Sync for VSet<T, ORDER>'
 pub struct VSet<T, const ORDER: Order> {
     inner: Vec<T>,
 }
 
-// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3ASorted%20}%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Sorted }>'
+// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Sorted+%7D%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Sorted }>'
 impl<T> VSet<T, { Order::Sorted }> {
     pub fn new() -> Self {
         Self { inner: Vec::new() }
     }
 }
 
-// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT%2C%20{%20Order%3A%3AUnsorted%20}%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Unsorted }>'
+// @has foo/struct.VSet.html '//*[@id="impl-VSet%3CT,+%7B+Order::Unsorted+%7D%3E"]/h3[@class="code-header"]' 'impl<T> VSet<T, { Order::Unsorted }>'
 impl<T> VSet<T, { Order::Unsorted }> {
     pub fn new() -> Self {
         Self { inner: Vec::new() }
@@ -31,7 +31,7 @@ impl<T> VSet<T, { Order::Unsorted }> {
 
 pub struct Escape<const S: &'static str>;
 
-// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr#%22%3Cscript%3Ealert(%22Escape%22)%3B%3C/script%3E%22#%3E"]/h3[@class="code-header"]' 'impl Escape<r#"<script>alert("Escape");</script>"#>'
+// @has foo/struct.Escape.html '//*[@id="impl-Escape%3Cr%23%22%3Cscript%3Ealert(%22Escape%22);%3C/script%3E%22%23%3E"]/h3[@class="code-header"]' 'impl Escape<r#"<script>alert("Escape");</script>"#>'
 impl Escape<r#"<script>alert("Escape");</script>"#> {
     pub fn f() {}
 }
diff --git a/tests/rustdoc/double-quote-escape.rs b/tests/rustdoc/double-quote-escape.rs
index 350c897417d..4f4436377a0 100644
--- a/tests/rustdoc/double-quote-escape.rs
+++ b/tests/rustdoc/double-quote-escape.rs
@@ -7,5 +7,5 @@ pub trait Foo<T> {
 pub struct Bar;
 
 // @has foo/struct.Bar.html
-// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe%20extern%20%22C%22%20fn()%3E-for-Bar"]' 'Foo<unsafe extern "C" fn()>'
+// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo%3Cunsafe+extern+%22C%22+fn()%3E-for-Bar"]' 'Foo<unsafe extern "C" fn()>'
 impl Foo<unsafe extern "C" fn()> for Bar {}
diff --git a/tests/rustdoc/primitive-tuple-variadic.rs b/tests/rustdoc/primitive-tuple-variadic.rs
index db7cfd60c71..846028bbb19 100644
--- a/tests/rustdoc/primitive-tuple-variadic.rs
+++ b/tests/rustdoc/primitive-tuple-variadic.rs
@@ -6,13 +6,13 @@
 pub trait Foo {}
 
 // @has foo/trait.Foo.html
-// @has - '//section[@id="impl-Foo-for-(T%2C)"]/h3' 'impl<T> Foo for (T₁, T₂, …, Tₙ)'
+// @has - '//section[@id="impl-Foo-for-(T,)"]/h3' 'impl<T> Foo for (T₁, T₂, …, Tₙ)'
 #[doc(fake_variadic)]
 impl<T> Foo for (T,) {}
 
 pub trait Bar {}
 
 // @has foo/trait.Bar.html
-// @has - '//section[@id="impl-Bar-for-(U%2C)"]/h3' 'impl<U: Foo> Bar for (U₁, U₂, …, Uₙ)'
+// @has - '//section[@id="impl-Bar-for-(U,)"]/h3' 'impl<U: Foo> Bar for (U₁, U₂, …, Uₙ)'
 #[doc(fake_variadic)]
 impl<U: Foo> Bar for (U,) {}
diff --git a/tests/rustdoc/sidebar-links-to-foreign-impl.rs b/tests/rustdoc/sidebar-links-to-foreign-impl.rs
index 11e94694802..caa17dfbb1c 100644
--- a/tests/rustdoc/sidebar-links-to-foreign-impl.rs
+++ b/tests/rustdoc/sidebar-links-to-foreign-impl.rs
@@ -7,8 +7,8 @@
 // @has - '//h2[@id="foreign-impls"]' 'Implementations on Foreign Types'
 // @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-u32"]' 'u32'
 // @has - '//*[@id="impl-Foo-for-u32"]//h3[@class="code-header"]' 'impl Foo for u32'
-// @has - '//*[@class="sidebar-elems"]//section//a[@href="#impl-Foo-for-%26%27a%20str"]' "&'a str"
-// @has - '//*[@id="impl-Foo-for-%26%27a%20str"]//h3[@class="code-header"]' "impl<'a> Foo for &'a str"
+// @has - "//*[@class=\"sidebar-elems\"]//section//a[@href=\"#impl-Foo-for-%26'a+str\"]" "&'a str"
+// @has - "//*[@id=\"impl-Foo-for-%26'a+str\"]//h3[@class=\"code-header\"]" "impl<'a> Foo for &'a str"
 pub trait Foo {}
 
 impl Foo for u32 {}
diff --git a/tests/rustdoc/where-clause-order.rs b/tests/rustdoc/where-clause-order.rs
index b8502e10a48..b10f8f6856e 100644
--- a/tests/rustdoc/where-clause-order.rs
+++ b/tests/rustdoc/where-clause-order.rs
@@ -7,7 +7,7 @@ where
 }
 
 // @has 'foo/trait.SomeTrait.html'
-// @has - "//*[@id='impl-SomeTrait%3C(A%2C%20B%2C%20C%2C%20D%2C%20E)%3E-for-(A%2C%20B%2C%20C%2C%20D%2C%20E)']/h3" "impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd<A> + PartialEq<A>, B: PartialOrd<B> + PartialEq<B>, C: PartialOrd<C> + PartialEq<C>, D: PartialOrd<D> + PartialEq<D>, E: PartialOrd<E> + PartialEq<E> + ?Sized, "
+// @has - "//*[@id='impl-SomeTrait%3C(A,+B,+C,+D,+E)%3E-for-(A,+B,+C,+D,+E)']/h3" "impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)where A: PartialOrd<A> + PartialEq<A>, B: PartialOrd<B> + PartialEq<B>, C: PartialOrd<C> + PartialEq<C>, D: PartialOrd<D> + PartialEq<D>, E: PartialOrd<E> + PartialEq<E> + ?Sized, "
 impl<A, B, C, D, E> SomeTrait<(A, B, C, D, E)> for (A, B, C, D, E)
 where
     A: PartialOrd<A> + PartialEq<A>,